(ns sam-diff.levenshtein.diff
  (:require
    [clojure.set :as set]
    [sam-diff.levenshtein
     [cost-of :as cost-of]
     [classify :as classify]]
    [sam-diff.optimised.meyer-string :as meyer-string]
    [sam-diff.schema :as schema]
    [clojure.pprint :as pp]))

(declare diff-seq)

(defn- default-diff
  [context a b]
  [context
   (when-not (and (= (type a) (type b))
                  (= a b))
     {:levenshtein (+ (cost-of/cost-of a) (cost-of/cost-of b))
      :type :atom
      :a a
      :deleted a
      :inserted b})])

(defmulti diff-impl (fn [_ a b] (mapv classify/classify [a b])))

(defmethod diff-impl :default
  [context a b]
  (default-diff context a b))

(defmethod diff-impl [:set :set]
  [context a b]
  (let [inserted    (set/difference b a)
        deleted     (set/difference a b)
        levenshtein (+ (cost-of/cost-of inserted)
                       (cost-of/cost-of deleted))]
    [context
     (when-not (and (empty? deleted)
                    (empty? inserted))
       {:deleted     deleted
        :inserted    inserted
        :type        :set
        :a           a
        :levenshtein levenshtein})]))

(defmethod diff-impl [:map :map]
  [context a b]
  (let [a-keys         (-> a keys set)
        b-keys         (-> b keys set)

        deleted-keys   (set/difference a-keys b-keys)
        inserted-keys  (set/difference b-keys a-keys)
        remain-keys    (set/intersection a-keys b-keys)

        create-k-v-set (fn [look-up e] (->> e (map #(vector % (look-up %))) (into {})))
        deleted        (create-k-v-set a deleted-keys)
        inserted       (create-k-v-set b inserted-keys)

        [context modified-diffs]
        (reduce
         (fn [[context diffs] k]
           (let [a-value  (a k)
                 b-value  (b k)
                 [context result]
                 (diff-impl context a-value b-value)]
             [context (cond-> diffs
                              result
                              (conj [k result]))]))
         [context []]
         remain-keys)

        modified (->> modified-diffs (filter identity) (into {}))

        levenshtein   (+ (cost-of/cost-of deleted)
                         (cost-of/cost-of inserted)
                         (->> modified
                              (map (comp :levenshtein val))
                              (reduce +)))]
    [context
     (when-not (and (empty? deleted)
                    (empty? inserted)
                    (empty? modified))
       {:deleted     deleted
        :inserted    inserted
        :modified    modified
        :type        :map
        :a           a
        :levenshtein levenshtein})]))

(defn update-ds
  [ds update-type cost location value]
  (-> (or ds {})
      (update :levenshtein #(+ (or % 0) cost))
      (update-in [update-type location] #(cons value (or % '())))))

(defn update-ds-singleton
  [ds update-type cost location value]
  (-> (or ds {})
      (update :levenshtein #(+ (or % 0) cost))
      (assoc-in [update-type location] value)))

(defn- diff-seq-impl
  [cache context original-a [a-h & a-r :as a] [b-h & b-r :as b]]
  (let [context (update context :complexity-count inc)]
    (cond
      (= a b)
      [cache context nil]

      (or (nil? a-h))
      [cache
       context
       {:levenshtein (cost-of/cost-of b)
        :inserted    {0 b}}]

      (or (nil? b-h))
      [cache
       context
       {:levenshtein (cost-of/cost-of a)
        :deleted
        (->> (range (-> a count dec) -1 -1)
             (reduce
               (fn [[result [a-h & a-r]] idx] [(conj result [idx a-h]) a-r])
               [[] a])
             first
             (into {}))}]

      (= a-h b-h)
      (recur cache context original-a a-r b-r)

      :else
      (let [
            ;; Deleted
            [cache context delete]
            (diff-seq cache context original-a a-r b)

            {lev-del :levenshtein :as delete}
            (update-ds-singleton delete :deleted (cost-of/cost-of a-h) (count a-r) a-h)

            ;; Inserted
            [cache context insert]
            (diff-seq cache context original-a a b-r)

            {lev-ins :levenshtein :as insert}
            (update-ds insert :inserted (cost-of/cost-of b-h) (count a) b-h)

            ;; Modified
            [context {lev-mod-h :levenshtein :as mod-h}]
            (diff-impl context a-h b-h)

            [cache context modified]
            (diff-seq cache context original-a a-r b-r)

            {lev-mod :levenshtein :as modified}
            (update-ds-singleton modified :modified (or lev-mod-h 0) (count a-r) mod-h)

            ;; Prioritise delete over insert over modify
            result
            (cond
              (and (<= lev-del lev-ins) (<= lev-del lev-mod))
              delete

              (<= lev-ins lev-mod)
              insert

              :else
              modified)]
        [cache context (assoc result :a original-a)]))))

(defn- diff-seq
  [cache context original-a a b]
  (let [cache-entry (get cache [a b] :not-found)]
    (if (not= cache-entry :not-found)
      [cache context cache-entry]
      (let [[cache context result] (diff-seq-impl cache context original-a a b)
            result                 (assoc result :type :seq)
            cache (assoc cache [a b] result)]
        [cache context result]))))

(defmethod diff-impl [:seq :seq]
  [context a b]
   (if-not (= a b)
     (rest (diff-seq {} context a a b))
     [context nil]))

(defmethod diff-impl [:string :string]
  [context a b]
  ;; todo temporarily use Meyer diff to increase performance
   ;(if-not (= a b)
   ;  (let [[context result] (rest (diff-seq {} context a (seq a) (seq b)))
   ;        result           (assoc result :type :string
   ;                                       :a a)]
   ;    [context result])
   ;  [context nil])
  ;(println a b)
  [context (meyer-string/raw-diff a b)])

(defn diff
  [a b]
  (let [[{:keys [complexity-count] :as _context} result]
        (diff-impl {:complexity-count 0} a b)]
    ;(pp/pprint result)
    (schema/diff-info-validator result)))

(defn edit-distance
  [a b]
  (if-let [d (diff a b)]
    (:levenshtein d)
    0))



