; Copyright (c) Sławek Gwizdowski
;
; Permission is hereby granted, free of charge, to any person obtaining
; a copy of this software and associated documentation files (the "Software"),
; to deal in the Software without restriction, including without limitation
; the rights to use, copy, modify, merge, publish, distribute, sublicense,
; and/or sell copies of the Software, and to permit persons to whom the
; Software is furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included
; in all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
; IN THE SOFTWARE.
;
(ns ^{:author "Sławek Gwizdowski"
      :doc "Useful row/map/record related functions."}
 szew.io.util
 (:require
   [clojure.set :as sets]
   [clojure.walk :as walk]
   [clojure-csv.core :as csv]
   [camel-snake-kebab.core :as csk]
   [clojure.spec.alpha :as s]))


;; ## Row fixers


(defn row-adjuster
  "Creates a function that will return vectors of same length as default-row.
  Missing columns will be filled by defaults. Extra columns dropped.

  Why?

  Because *SV does not have to be well formed, numbers of column may vary.

  How?

  ((row-adjuster [1 2 3]) [:x])
  => [:x 2 3]
  ((row-adjuster [1 2 3]) [:1 :2 :3 :4])
  => [:1 :2 :3]
  "
  ([default-row]
   {:pre [(vector? default-row)]}
   (if (empty? default-row)
     identity
     (let [default        (vec default-row)
           default-length (count default)]
       (with-meta
         (fn adjuster
           ([a-row]
            {:pre [(vector? a-row)]}
            (let [row-length (count a-row)]
              (with-meta
                (if (= row-length default-length)
                  a-row
                  (if (> row-length default-length)
                    (subvec a-row 0 default-length)
                    (into a-row (subvec default row-length))))
                {:original-row a-row
                 :default-row  default-row}))))
         {:default-row    default
          :default-length default-length
          :doc "Adjust row based on default-row and default-length."})))))


;; ## Line Splitters


(defn fixed-width-split
  "Accept vector of slice sizes, returns function that will cut String.

  Why?

  Because somebody thought fixed width data records are a good thing.

  How?

  ((fixed-width-split [4 3 4]) \"Ala ma Kota.\")
  => [\"Ala \" \"ma \" \"Kota\"]
  "
  ([fields]
   {:pre [(not (empty? fields)) (every? number? fields) (every? pos? fields)]}
   (let [steps   (partition 2 1 (reductions + 0 fields))
         idx-end (last (last steps))
         slicer  (->> steps
                      (mapv (fn [[start stop]] #(subs % start stop)))
                      (apply juxt))]
     (with-meta
       (fn fixed-width-splitter
         ([^String row]
          {:pre [(string? row)]}
          (let [len (count row)]
            (when (> idx-end len)
              (throw (ex-info
                       (format "Given row is too short: is %d, want %d."
                               len idx-end)
                       {:row     row
                        :idx-end idx-end
                        :length  len})))
            (with-meta
              (slicer row)
              {:original-row row
               :steps        steps
               :fields       fields}))))
       {:steps  steps
        :fields fields
        :doc    "Accepts String, returns [String] slices."}))))


;; ## Record curators


(defn field-unpacker
  "See if field contains a str/char delimiter, if so -- tries to parse as CSV.

  Why?

  Sometimes your fields have fields. (So many fields, fields for days.)

  How?

  (field-unpacker \\, \"ala,ma,kota\")
  => [\"ala\", \"ma\", \"kota\"]
  "
  [delimiter ^String field]
  (if-not (and (string? field) (.contains field (str delimiter)))
    field
    (first (csv/parse-csv field :delimiter delimiter))))


(defn row-field-unpacker
  "Run field unpacker over entire row.

  Why?

  Partial it away and apply to nested CSV/TSV stuff.

  How?

  (row-field-unpacker \\, [\"xnay\" \"ala,ma,kota\" \"unpackey\")
  => [\"xnay\" [\"ala\" \"ma\" \"kota\"] \"unpackey\"]
  "
  [delimiter a-row]
  (mapv (partial field-unpacker delimiter) a-row))


;; ## Record helpers


(defn vec->map
  "Takes header and values vector, zipmaps, adds metadata. Allows defaults.

  Why?

  Singular operation for vecs->maps, with inputs attached as meta.

  How?
  (vec->map [:k1 :k2] [1 2])
  => {:k1 1, :k2 2}
  (vec->map {:kx 100} [:k1 :k2] [1 2])
  => {:kx 100, :k1 1, :k2 2}
  (meta (vec->map {:kx 100} [:k1 :k2] [1 2]))
  => {:defaults {:kx 100}, :header [:k1 :k2], :values [1 2]}
  "
  ([defaults header values]
   (with-meta
     (merge-with #(if (nil? %2) %1 %2)
                 defaults
                 (zipmap header values))
     {:defaults defaults
      :header   header
      :values   values}))
  ([header values]
   (with-meta
     (zipmap header values)
     {:defaults nil
      :header   header
      :values   values})))


(defn vecs->maps
  "Takes header vector and tails, returns seq of maps.

  Zipmaps head with tails. Merges with defaults, doing nil substitution.

  Why?

  Maps are friendlier then vectors.

  How?

  (vecs->maps [[:k1 :k2] [1 2] [3 4]])
  => ({:k1 1, :k2 2}, {:k1 3 :k2 4})
  (vecs->maps [:k1 :k2] [[1 2] [3 4]])
  => ({:k1 1, :k2 2}, {:k1 3 :k2 4})
  (let [s [[:a :b] [1 2] [3 4]]]
    (vecs->maps (first s) (rest s)))
  => ({:a 1, :b 2}, {:a 3, :b 4})
  (let [d {:x 100, :b 9}
        s [[:a :b] [1 nil] [3 4]]]
    (vecs->maps d (first s) (rest s)))
  => ({:a 1 :b 9 :x 100} {:a 3 :b 4 :x 100})
  "
  ([defaults head tails]
   (map (partial vec->map defaults head) tails))
  ([head tails]
   (map (partial vec->map head) tails))
  ([a-seq]
   (vecs->maps (first a-seq) (rest a-seq))))


(defn map->vec
  "Takes header vector and map. Returns vector with meta. Allows defaults.

  Why?

  Singular version of maps->vecs, with inputs attached as meta.

  How?
  (map->vec [:k1 :k2] {:k1 1, :k2 2})
  => [1 2]
  (map->vec {:kx 100} [:k1 :k2 :kx] {:k1 1, :k2 2})
  => [1 2 100]
  (meta (map->vec {:kx 100} [:k1 :k2 :kx] {:k1 1 :k2 2}))
  => {:defaults {:kx 100}, :header [:k1 :k2 :kx], :mapped {:k1 1, :k2 2}}
  "
  ([defaults header mapped]
   (with-meta
     (mapv (merge defaults mapped) header)
     {:defaults defaults
      :header   header
      :mapped   mapped}))
  ([header mapped]
   (with-meta
     (mapv mapped header)
     {:defaults nil
      :header   header
      :mapped   mapped})))


(defn maps->vecs
  "Takes header vector and maps, returns seq of vectors.

  Vector maps tails over head. Defaults are done by merging before mapping.

  Why?

  Because maps are so nice, but it's also nice to be able to dump them back.

  How?

  (maps->vecs [[:k1 :k2] {:k1 1, :k2 2}])
  => ([1 2])
  (maps->vecs [:k1 :k2] [{:k1 1, :k2 2}])
  => ([1 2])
  (maps->vecs {:kx 100} [:k1 :k2 :kx] [{:k1 1, :k2 2}])
  => ([1 2 100])
  (maps->vecs [:k1 :k2] [{:k1 1, :k2 2} {:k1 1, :k2 0}])
  => ([1 2] [1 0])
  "
  ([defaults head tails]
   (map (partial map->vec defaults head) tails))
  ([head tails]
   (map (partial map->vec head) tails))
  ([a-seq]
   (maps->vecs (first a-seq) (rest a-seq))))


(defn maps-maker
  "Create maps-maker over two functions of input sequence: head and tails.

  Defaults: head is first row as kebab-case-keyword vector, tails is rest.

  Why?

  Because it's a common scenario. I wrote it too many times by hand.

  How?

  ((maps-maker) [[\"X\" \"Y\"] [0 0] [1 1] [10 10]])
  => ({:x 0 :y 0} {:x 1 :y 1} {:x 10 :y 10})
  "
  ([head tails]
   (with-meta
     (fn make-maps
       ([defaults a-sequence]
        (let [header      (head a-sequence)
              header-meta #(vary-meta %2
                                      assoc
                                      :number   %1
                                      :header   header
                                      :defaults defaults)
              datas       (tails a-sequence)]
          (if (empty? defaults)
            (map header-meta (range) (vecs->maps header datas))
            (map header-meta (range) (vecs->maps defaults header datas)))))
       ([a-sequence]
        (make-maps {} a-sequence)))
     {:head head :tails tails}))
  ([]
   (maps-maker (comp (partial mapv csk/->kebab-case-keyword) first) rest)))


(defn bespoke-header
  "Accepts vector and a map, returns header vector derived from both.

  Header consists of base-header and remaining keys from the map, sorted.

  If strict: base-header must be a subset of keys in the map, otherwise
  ExceptionInfo. If not strict then just 'do your best'.

  If only given the map - returns vector of keys.

  Why?

  You want some key columns first, while keeping all the data.

  How?

  (bespoke-header [:x :y :z] {:state :resting :x 0 :y 0 :z 0})
  => [:x :y :z :state]
  (bespoke-header [:x :y :z :cycle] {:state :resting :x 0 :y 0 :z 0})
  => [:x :y :z :cycle :state]
  (bespoke-header [:x :y :z :cycle] true {:state :resting :x 0 :y 0 :z 0})
  => clojure.lang.ExceptionInfo: Missing keys in header?
  "
  ([base-header strict? a-map]
   (let [header-set    (set base-header)
         present-set   (set (keys a-map))
         remainder-set (sets/difference present-set header-set)]
     (if (empty? a-map)
       (vec base-header)
       (if (and strict? (not (sets/subset? header-set present-set)))
         (throw
           (ex-info "Missing keys in header?"
                    {:a-map a-map
                     :missing (sets/difference header-set present-set)
                     :header-set header-set
                     :present-set present-set
                     :remainder-set remainder-set}))
         (into (vec base-header) (sort remainder-set))))))
  ([base-header a-map]
   (bespoke-header base-header false a-map))
  ([a-map]
   (vec (sort (keys a-map)))))


(defn vecs-maker
  "Accepts defaults and two output producing functions of header and row.

  By default header-prep is vector of SCREAMING_SNAKE_CASE_STRING.

  By default row-prep is a vector of string with nil becoming \"NULL\".

  Bespoke header based on first records, so they better be uniform!

  Why?

  Quick and dirty dumps!

  How?
  ((vecs-maker) [[:x :y :z] [{:x 0 :y 0} {:x 10 :y 10}]])
  => ([\"X\" \"Y\" \"Z\"] [\"0\" \"0\" \"NULL\"] [\"10\" \"10\" \"NULL\"])
  ((vecs-maker {:z 0}) [[:x :y :z] [{:x 0 :y 0} {:x 10 :y 10}]])
  => ([\"X\" \"Y\" \"Z\"] [\"0\" \"0\" \"0\"] [\"10\" \"10\" \"0\"])
  ((vecs-maker {:z 0} identity identity)
   [[:x :y :z] [{:x 0 :y 0} {:x 10 :y 10}]])
  => ([:x :y :z] [0 0 0] [10 10 0])
  ((vecs-maker nil identity identity)
   [[:x :y :z] [{:x 0 :y 0} {:x 10 :y 10}]])
  => ([:x :y :z] [0 0 nil] [10 10 nil])
  "
  ([defaults header-prep row-prep]
   (with-meta
     (fn make-vecs
       ([[header records]]
        (let [extended (bespoke-header header (first records))]
          (cons (header-prep extended)
                (map row-prep
                     (maps->vecs defaults extended records))))))
     {:defaults    defaults
      :header-prep header-prep
      :row-prep    row-prep
      :doc "Wants [header records], returns seq of vectors."}))
  ([defaults]
   (vecs-maker defaults
              (partial mapv csk/->SCREAMING_SNAKE_CASE_STRING)
              (partial mapv (fnil str "NULL"))))
  ([]
   (vecs-maker nil
               (partial mapv csk/->SCREAMING_SNAKE_CASE_STRING)
               (partial mapv (fnil str "NULL")))))


(defn ^{:deprecated "0.3.0"}
  keywordify
  "Take something, take string of it, then make it into keyword.

  DEPRECATED! Consider: camel-snake-kebab.core/->kebab-case

  Why?

  Sometimes you just need a keyword that looks semi decent.

  How?

  (keywordify \"Account: OPEX\")
  => :account-opex
  "
  [a-something]
  (-> a-something
      (str)
      (.toLowerCase)
      (.replaceAll "\\s" "_")      ; whites        -> "_"
      (.replaceAll "[^\\w]" "_")   ; non alphas    -> "_"
      (.replaceAll "_+" "_")       ; multiple "_"  -> "_"
      (.replace \_ \-)             ; every    "_"  -> "-"
      (.replaceAll "^-+" "")       ; beginning "-" -> ""
      (.replaceAll "-+$" "")       ; ending   "-"  -> ""
      (.replaceAll "-+" "-")
      (keyword)))


(defn ^{:deprecated "0.3.0"}
  bastardify
  "Take something, string it and keywordify it, underscores for hyphens.

  DEPRECATED! Consider: camel-snake-kebab.core/->snake_case_keyword

  Why?

  Underscores are OK for things like H2, hyphens? Not so much.

  How?

  (bastardify \"Account: OPEX\")
  => :account_opex
  "
  [a-something]
  (-> a-something
      (str)
      (.toLowerCase)
      (.replaceAll "\\s" "_")      ; whites        -> "_"
      (.replaceAll "[^\\w]" "_")   ; non alphas    -> "_"
      (.replaceAll "_+" "_")       ; multiple "_"  -> "_"
      (.replaceAll "^_+" "")       ; beginning "_" -> ""
      (.replaceAll "_+$" "")       ; ending "_"    -> ""
      (keyword)))


(defn friendlify
  "Take Clojure object and try to make a pretty String from its Class.

  Why?

  Display function name at runtime, but similar to how it looks in code.

  How?

  (friendlify friendlify)
  => \"szew.io.util/friendlify\"
  "
  [a-something]
  (-> a-something
      (class)
      (str)
      (.replaceFirst "(^class )" "")
      (.replaceFirst "(__\\d+$)" "")
      (.replaceFirst "(@[a-fA-F0-0]+$)" "")
      (.replaceAll "_BANG_" "!")
      (.replaceAll "_BAR_" "|")
      (.replaceAll "_SHARP_" "#")
      (.replaceAll "_PERCENT_" "%")
      (.replaceAll "_AMPERSTAND_" "&")
      (.replace \$ \/)
      (.replaceAll "_QMARK_" "?")
      (.replaceAll "_PLUS_" "+")
      (.replaceAll "_STAR_" "*")
      (.replaceAll "_LT_" "<")
      (.replaceAll "_GT_" ">")
      (.replaceAll "_EQ_" "=")
      (.replaceAll "_DOT_" ".")
      (.replaceAll "_COLON_" ":")
      (.replaceAll "_SINGLEQUOTE_" "'")
      (.replace \_ \-)))


(defn getter
  "Takes key and default value, returns a function that gets it.

  Why?

  Builtin get takes collection as first argument, this is the other way around.

  How?

  (mapv (getter :yolo :no) [{:yolo :yes} {:wat? :wat}])
  => [:yes :no]
  (meta (getter :yolo :no))
  => {:key :yolo, :default :no}
  (mapv (getter 0 :no) [[:yes] []])
  => [:yes :no]
  (meta (getter 1 :no))
  => {:key 1, :default :no}
  "
  ([a-key default]
   (with-meta
     (fn sub-getter [gettable]
       (get gettable a-key default))
     {:key a-key :default default}))
  ([a-key]
   (getter a-key nil)))


(defn juxt-map
  "Give keys and values, get a juxt map of keys-values. Hint: zipmap over juxt.

  Why?

  Maps are bees knees and juxt is just cool.

  How?

  ((juxt-map :+ inc :- dec := identity) 2)
  => {:+ 3, :- 1, := 2}
  "
  [& keys-fns]
  {:pre [(-> keys-fns count even?)]}
  (let [parts  (partition 2 2 nil keys-fns)
        keysv  (mapv first parts)
        juxt*  (apply juxt (map second parts))]
    (with-meta
      (fn juxt-mapper [& args]
          (zipmap keysv (apply juxt* args)))
      {:keys-fns keys-fns})))


(defn deep-sort
  "Take a map of maps, return sorted-map of sorted-maps.

  Why?

  Sometimes you want to be able to just look at the map and know if a key
  is there. Ordering keys helps.

  How?

  (deep-sort {100 :a, 90 {10 :maybe, 9 {:yes :no}}, 80 :b})
  => {80 :b, 90 {9 {:yes :no}, 10 :maybe}, 100 :a}
  "
  [map-of-maps]
  (walk/postwalk #(if (map? %) (into (sorted-map) %) %) map-of-maps))


(defn roll-in
  "Take sequence of sequences, last item is value, butlast items are the key.
  Return map of maps.

  If agg callable is given -- it's used with update-in, otherwise entries
  action is assoc-in.

  Why?

  I just like maps.

  How?

  (roll-in [[:a :b 3] [:a :c 4] [:x :z 0]])
  => {:a {:b 3 :c 4} :x {:z 0}}
  (roll-in (fnil + 0) [[:a :b 3] [:a :c 4] [:x :z 0] [:a :c 2]])
  => {:a {:b 3 :c 6} :x {:z 0}}
  "
  ([seq-of-seqs]
   (reduce (partial apply assoc-in)
           {}
           (map (juxt butlast last) seq-of-seqs)))
  ([agg seq-of-seqs]
   (reduce (partial apply update-in)
           {}
           (map (juxt butlast (constantly agg) last) seq-of-seqs))))


(defn roll-out
  "Take a map of maps, return sequence of vectors.

  Why?

  I like maps, but vectors are also pretty neat.

  How?

  (set (roll-out {:a {:b 3 :c 4} :x {:z 0}}))  ;; because ordering.
  => #{[:a :b 3] [:a :c 4] [:x :z 0]}
  (set (roll-out #(contains? % :b) {:a {:b 3 :c 4} :x {:z 0}}))
  => #{[:a {:b 3 :c 4}] [:x :z 0]}
  "
  ([map-of-maps]
   (roll-out (constantly false) map-of-maps))
  ([stop-at? map-of-maps]
   (letfn[(step [path stack]
            (when (seq stack)
              (let [path* (if (zero? (count path)) [] (pop path))]
                (if (empty? (peek stack)) ;; nothing to do on current level
                  (recur path* (pop stack))  ;; POP-POP!
                  [(conj path* (peek (peek stack)))  ;; switch in path
                   (conj (pop stack) (pop (peek stack)))])))) ;; up in stack
          (snek [path stack]
            (let [x (get-in map-of-maps path)]
              (if-not (or (not (map? x)) (stop-at? x))
                (let [descent (vec (sort (keys x)))]
                  (recur (conj path (peek descent))
                         (conj stack (pop descent))))
                (let [[path* stack*] (step path stack)]
                  (cons (conj path x)
                        (lazy-seq
                          (when (seq path*)
                            (snek path* stack*))))))))]
     (if (stop-at? map-of-maps)
       [map-of-maps]
       (let [[path stack] (step [] [(vec (sort (keys map-of-maps)))])]
         (snek path stack))))))


;; Parallel transducers


(defn default-parallel-n
  "Return advised parallel workload. Logical processor count + 2.

  Why?

  Sometimes you just want the answer.

  How?

  (default-parallel-n)
  => 10
  "
  []
  (+ 2 (.availableProcessors (Runtime/getRuntime))))


(defn ppmap
  "Parametrized parallel map. Like pmap, but with max of n threads at once.

  Accepts number of threads n, function f and collections. If no collections
  are provided it produces a stateful transducer.

  Why?

  Better pmap for multicore world.

  How?

  (vec (map inc [1 2 3]))
  => [2 3 4]
  (vec (ppmap 1 inc [1 2 3]))
  => [2 3 4]
  "
  ([n f]
    (fn [rf]
      (let [buffer (volatile! [])]
        (fn
          ([] (rf))
          ([result]
           (let [buff @buffer]
             (if (empty? buff)
               (rf result)
               (transduce (map deref) rf result @buffer))))
          ([result input]
           (let [buff (vswap! buffer conj (future (f input)))]
             (if (= n (count buff))
               (do (vreset! buffer [])
                   (transduce (map deref) rf result buff))
               (rf result))))
          ([result input & inputs]
           (let [buff (vswap! buffer conj (future (apply f input inputs)))]
             (if (= n (count buff))
               (do (vreset! buffer [])
                   (transduce (map deref) rf result buff))
               (rf result))))))))
  ([n f coll]
   (sequence (ppmap n f) coll))
  ([n f coll & colls]
   (apply sequence (ppmap n f) coll colls)))


(defn ppfilter
  "Parametrized parallel filter, with max of n threads at once.

  Accepts number of threads n, predicate p and a collection. If no collection
  is provided it produces a stateful transducer.

  Why?

  The real question is: why no pfilter?!

  How?

  (vec (filter odd? [1 2 3]))
  => [1 3]
  (vec (ppfilter 1 odd? [1 2 3]))
  => [1 3]
  "
  ([n p]
    (fn [rf]
      (let [buffer (volatile! [])
            subxf  (comp (filter (comp deref first)) (map second))]
        (fn
          ([] (rf))
          ([result]
           (let [buff @buffer]
             (if (empty? buff)
               (rf result)
               (transduce subxf rf result @buffer))))
          ([result input]
           (let [buff (vswap! buffer conj [(future (p input)) input])]
             (if (= n (count buff))
               (do (vreset! buffer [])
                   (transduce subxf rf result buff))
               (rf result))))))))
  ([n f coll]
   (sequence (ppfilter n f) coll)))


(defn ppmapcat
  "Parametrized parallel mapcat, with max of n threads at once for mapping.
  If no collection is provided ir produces a stateful transducer.

  Accepts number of threads n, function f and a collection.

  Why?

  Easy once ppmap is defined so why not?

  How?

  (mapcat (juxt dec identity inc) [1 2 3 4])
  => (0 1 2 1 2 3 2 3 4 3 4 5)
  (ppmapcat 4 (juxt dec identity inc) [1 2 3 4])
  => (0 1 2 1 2 3 2 3 4 3 4 5)
  "
  ([n f]
   (comp (ppmap n f) cat))
  ([n f & colls]
   (apply sequence (ppmapcat n f) colls)))


;; ### Some backwards compatibility


(def ^{:doc "Compatibility shim for recordify, see vecs->maps."
       :deprecated "0.3.0"}
  recordify vecs->maps)


(def ^{:doc "Compatibility shim for de-recordify, see maps->vecs."
       :deprecated "0.3.0"}
  de-recordify map->vec)
