(ns witan.phyrexian.gss-harmonizer
  (:require [clojure.java.io :as io]
            [clojure.edn :as edn]))

(defn load-gss-data []
  (->> (io/file "data/harmonization/gss_code_changes.edn")
       (slurp)
       (edn/read-string)))

;;gss code lookup table
(def code-changes (load-gss-data))

(defn +nil
  "Ignores nils & returns sum, unless both values are nil in which case
  returns nil. This is to differentiate zeros from missing data."
  [a b]
  (cond
    (and (nil? a) (nil? b)) nil
    (nil? a) b
    (nil? b) a
    :else (+ a b)))

(defn sum-over-with-nil
  "Sum over a map with nils"
  [columns aggregated row]
  (merge-with +nil aggregated (select-keys row columns)))

(defn sum-over-grouped-map
  "Takes vector of keys (columns) to sum over and data which has been grouped
  using the group-by fn. Returns a map of the summed and grouped data.
  FIXME: Assumes all gss codes are there for multi codes that get merge into 1.
  Means that if any are missing, sum is not correct."
  [keys-to-sum grouped-data]
  (map (fn [[group-key grouped-values]]
         (merge group-key
                (reduce (partial sum-over-with-nil keys-to-sum) grouped-values)))
       grouped-data))

(defn apply-census-codes 
  "Updates gss-code to match 2011 Census code for consistency across datasets.
   Returns map with updated gss-code."
  [m]
  (update m :gss-code (fn [g] ((keyword g) code-changes))))

(defn reduce-summed-data-to-maps
  "Takes summed, grouped data that has been produced with a custom group-by function.
   Gets rid of group-by keys and returns collection of maps with data only."
  [data]
  (reduce (fn [a v] (conj a (last v))) [] data))

(defn group-by-and-sum

  [group-by-cols keys-to-sum data]
  (->> data
       (group-by (apply juxt group-by-cols))
       (sum-over-grouped-map keys-to-sum)
       (into [])
       (reduce-summed-data-to-maps)))

(defn harmonize
  [group-by-cols keys-to-sum dataset]
  (->> dataset
       (map apply-census-codes)
       (group-by-and-sum group-by-cols keys-to-sum)))




