(ns witan.phyrexian.ons-ingester
  (:require [clojure.java.io :as io]
            [clojure.string :as str]
            [clojure.set :as st]
            [clojure.data.csv :as data-csv]
            [clojure.core.matrix.dataset :as ds]
            [schema.coerce :as coerce]
            [schema.core :as s]
            [schema.utils :as su]
            [witan.phyrexian.utils :as u]
            [witan.phyrexian.gss-harmonizer :as gss]))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ONS 2014 Population Projection,               ;;
;; with total population (not split by gender)   ;;
;;                                               ;;
;; No manual processing needed                   ;;
;; Used to compare with the CCM                  ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(def onsSchema
  {:gss.code s/Str
   :age s/Int
   :year s/Int
   :popn double})

(defn coerce-by-over-90 [row]
  (if (u/is-90-or-over? row)
    (assoc row :AGE_GROUP "90")
    row))

(def scrub-data
  (comp
   (map coerce-by-over-90)
   (remove u/age-group-equals-all-ages?)
   (map #(st/rename-keys % {:AGE_GROUP :age
                            :AREA_CODE :gss.code}))))

(defn format-data [schema]
  (comp
   (mapcat #(u/gather-by-year [:gss.code :age] % :popn))
   (map #(u/schema-coercion schema %))
   (remove su/error?)))

(defn process-ons-data
  [filename schema]
  (let [csv (u/load-csv filename)
        xf (comp scrub-data (format-data schema))]
    (into [] xf csv)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ONS 2014 Population Projection,                              ;;
;; with population split by gender                              ;;
;;                                                              ;;
;; Manual processing needed:                                    ;;
;;  - Combine rows from 2 csv files, one each for males/females ;;
;;                                                              ;;
;; Can be used as population input for household model          ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(def ONSPopulationByGenderSchema
  {:gss-code s/Str
   :age s/Int
   :sex (s/enum "F" "M")
   :year s/Int
   :population java.lang.Double})

(defn convert-sex-to-M-F [row]
  (assoc row :sex (cond (= (:sex row) "males") "M"
                        (= (:sex row) "females") "F"
                        :else nil))) 

(def scrub-data-with-gender
     (comp
      (map coerce-by-over-90)
      (remove u/age-group-equals-all-ages?)
      (map #(st/rename-keys % {:AGE_GROUP :age
                               :AREA_CODE :gss-code
                               :SEX :sex}))
      (map convert-sex-to-M-F)))

(defn format-data-with-gender [schema val-keyname]
  (comp
   (mapcat #(u/gather-by-year [:gss-code :age :sex] % val-keyname))
   (map #(u/schema-coercion schema %))
   (remove su/error?)))

(defn process-ons-data-with-gender
  [filename schema val-keyname]
  (let [csv (u/load-csv filename)
        xf (comp scrub-data-with-gender (format-data-with-gender schema val-keyname))
        harmonized (gss/harmonize [:gss-code :age :sex :year] [val-keyname] (into [] xf csv))]
    (sort-by (juxt :gss-code :year :sex :age) harmonized)))

(defn ingest-ons-data-with-gender
  "Scrub and format ONS population projection for loading into household projection"
  []
  (let [ons-in "data/data_to_ingest/ons_popn_projections/data_inputs_raw/ons_2014_based_snpp.csv"
        ons-out "data/data_to_ingest/ons_popn_projections/data_inputs_prepped/ons_2014_based_snpp.csv"
        population (process-ons-data-with-gender ons-in ONSPopulationByGenderSchema :population)]
    (u/write-csv ons-out population)))

(defn -main []
  (ingest-ons-data-with-gender))
