(ns witan.phyrexian.utils
  (:require [clojure.java.io :as io]
            [clojure.data.csv :as data-csv]
            [schema.coerce :as coerce]
            [clojure.edn :as edn]
            [clojure.core.matrix.dataset :as ds]))

;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; READ, WRITE CSV FILES ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defn open-csv [filename]
  (with-open [in-file (io/reader filename)]
    (vec (data-csv/read-csv in-file))))

(defn load-csv
  "takes a filename of a csv and returns a seq of maps with
   headers as keys and rows as vals"
  [filename]
  (let [[header & file-data] (open-csv filename)
        hk (mapv keyword header)]
    (map #(zipmap hk %) file-data)))

(defn convert-row-maps-to-vector-of-vectors
  "Takes a sequence of maps and returns a vector of vectors where the first vector
   contains column names and the following vetors contain the value for each
   row. This is the format needed to save as csv using data-csv/write-csv"
  [rows-as-maps]
  (let [colnames (mapv name (keys (first rows-as-maps)))
        rows (mapv #(vec (vals %)) rows-as-maps)]
    (into [colnames] rows)))

(defn write-csv
  [path data]
  (println "Writing" path)
  (with-open [out-file (io/writer path)]
    (data-csv/write-csv out-file (convert-row-maps-to-vector-of-vectors data))))

(defn write-ordered-csv
  [path data ordered-colnames]
  (println "Writing" path)
  (with-open [out-file (io/writer path)]
    (data-csv/write-csv out-file (concat [ordered-colnames]
                                         (map #(mapv % ordered-colnames) data)))))

;;;;;;;;;;;;;;
;; DATASETS ;;
;;;;;;;;;;;;;;

(defn coerce-if-num [x]
  "If string contains only numbers, floating points, or ratio,
   convert to number"
  (cond (string? x)
        (try
          (-> x
              (clojure.string/replace #"," "")
              java.lang.Double/parseDouble)
          (catch Exception e
            x))
        :else x))

(defn coerce-vec [vec]
  (mapv #(coerce-if-num %) vec))

(defn coerce-dataset [ds]
  ""
  (let [column-names (:column-names ds)
        raw-cols (:columns ds)
        n-rows (count (first raw-cols))
        coerce-rows (map coerce-vec raw-cols)
        columns (into [] (map (fn [n] (into [] (map #(nth % n) coerce-rows))) (range n-rows)))]
    (ds/dataset column-names columns)))

(defn csv-to-dataset
  "Takes in a file path and creates a dataset with csv
   data."
  [filename]
  (let [file (io/file filename)]
    (when (.exists (io/as-file file))
      (let [parsed-csv (with-open [in-file (io/reader file)]
                         (doall (data-csv/read-csv in-file)))
            parsed-data (rest parsed-csv)
            rm-nil-values (filter (fn [d] (not-any? #(= "NULL" %) d)) parsed-data)
            headers (first parsed-csv)]
        (coerce-dataset (ds/dataset headers (vec rm-nil-values)))))))

;;;;;;;;;;;;;;
;; COERCION ;;
;;;;;;;;;;;;;;

(defn schema-coercion [schema data]
  ((coerce/coercer schema coerce/string-coercion-matcher) data))

;;;;;;;;;;;;;;;;
;; PREDICATES ;;
;;;;;;;;;;;;;;;;

(defn year-key? [k]
  ((complement nil?) (re-matches #"^[1-2][89012][0-9][0-9]$" (name k))))

(defn integer-key? [k]
  (integer? (name k)))

(defn numeric-value? [v]
  (let [value (read-string v)]
    (or (float? value) (integer? value))))

(defn age-group-equals-total? [m]
  (= "TOT" (:Age m)))

(defn year-before-2011? [m]
  (< (:year m) 2011))

(defn year-not-2015? [m]
  (not= (:year m) 2015))

(defn is-90-or-over? [m]
  (= (:AGE_GROUP m) "90 and over"))

(defn age-group-equals-all-ages? [m]
  (= "All ages" (:AGE_GROUP m)))

(defn missing-gss-code? [m]
  (nil? (:gss-code m)))

(def valid-gss-codes (->> (io/file "data/harmonization/gss_codes_2011_census.edn")
                          (slurp)
                          (edn/read-string)))

(defn valid-gss-code? [m]
  (.contains valid-gss-codes (:gss-code m)))

;;;;;;;;;;;;;
;; SORTING ;;
;;;;;;;;;;;;;

(defn sort-dclg-data [m]
  (sort-by (juxt :gss-code :year :sex :relationship :age-group) m))

(defn sort-dwellings-data [m]
  (sort-by (juxt :gss-code :year) m))

;;;;;;;;;;;;;;;
;; GATHERING ;;
;;;;;;;;;;;;;;;

(defn gather-by-year
  "Takes a vector of keys that should not be gathered, and a map. From the
   keys that can be gathered, the map should include keys that correspond to years
   and any type of value. Converts years from columns into values in a column called
   :year; the values go in a column with name val-keyname. Returns a new map with
   the keys that were not gathered plus the new columns :year and :val-keyname.
   NOTE:  This gathers all values from year columns, regardless of value type"
  [keys m val-keyname]
  (let [keep-keys (select-keys m keys)]
    (remove nil? (reduce-kv (fn [a k v] (conj a
                                              (when (year-key? k)
                                                (merge
                                                 keep-keys
                                                 {:year (name k)
                                                  val-keyname v})))) [] m))))
