(ns leafgrabber.free-text.attribute
  (:use [cascalog.api :only (hfs-textline lfs-textline stdout <- ?<- ?-
                             defmapop defmapcatop defbufferop)]
        [clojure.set :only (union)]
        [clojure.contrib.duck-streams :only (read-lines)]
        [clojure.data.json :only (json-str read-json)]
        [clojure.contrib.string :only (split as-str)])
  (:require [leafgrabber.core :as lgc]
            [leafgrabber.free-text.extractor :as ext]
            [clojure.string :as str]
            [cascalog [vars :as v] [ops :as c]])
  )

; the table of attributes, with their extractors and aggregators
(def ^:dynamic *attribute-table* {})

(defn sum-att-agg-helper
  [values ret]
  (if (empty? values)
    ret
    (recur (rest values)
           (let [first-val (read-json (first values))]
             (assoc ret
               0 (+ (ret 0) (first-val 0))
               1 (+ (ret 1) (first-val 1))
               2 (+ (ret 2) (first-val 2)))
             )))
  )

(defn majority-att-aggregator
  [values]
  (let [ext-values (map second values)
        agg-val (sum-att-agg-helper ext-values [0 0 0])
        trues (agg-val 0)
        falses (agg-val 1)
        no-evs (agg-val 2)]
    (cond (= trues falses) "no-evidence"
          (> trues falses) true
          (< trues falses) false))
  )

(defn add-context-reg-att
  [name exts]
  (def ^:dynamic *attribute-table*
    (assoc *attribute-table*
      name
      {:extractors exts
       :aggregator majority-att-aggregator}))
  )

(defn sum-maps
  [map1 map2]
  (if (empty? map1)
    map2
    (recur (rest map1)
           (let [kvp (first map1)
                 key (first kvp)
                 value (second kvp)]
             (assoc map2 key (+ value (get map2 key 0)))))
    ))

(defn sum-enum-att-agg-helper
  [values ret]
  (if (empty? values)
    ret
    (recur (rest values) (sum-maps (first values) ret))
   ))

(defn mode-key
  "Get the key with the highest value, not counting no-evidence.
   If there are ties, return no-evidence"
  [in-map high-keys high-val]
  (if (empty? in-map)
    (if (second high-keys)
      "no-evidence"
      (first high-keys))
    (let [kvp (first in-map)
          key (as-str (first kvp))
          value (second kvp)]
      (cond (= key "no-evidence") (recur (rest in-map) high-keys high-val)
            (= value high-val) (recur (rest in-map) (cons key high-keys) high-val)
            (> value high-val) (recur (rest in-map) (list key) value)
            true (recur (rest in-map) high-keys high-val))
       ))
   )

(defn mode-enum-att-aggregator
  [values]
  (let [ext-values (map #(read-json (second %)) values)
        agg-val (sum-enum-att-agg-helper ext-values {})]
    (mode-key agg-val '("no-evidence") 0)
    )
  )

(defn add-enum-reg-att
  [name exts]
  (def ^:dynamic *attribute-table*
    (assoc *attribute-table*
      name
      {:extractors exts
       :aggregator mode-enum-att-aggregator}))
  )

(defn make-count-map
  [values map]
  (if (empty? values)
    map
    (recur (rest values)
           (assoc map (first values) (inc (or  (get map (first values)) 0))))
  ))

(defn max-val
  [count-map ret cnt]
  (if (empty? count-map)
    (if (nil? ret) "null" ret)
    (recur (rest count-map)
           (if (> (second (first count-map)) cnt) (first (first count-map)) ret)
           (max (second (first count-map)) cnt)
           ))
  )

(defn majority-cap-aggregator
  [values]
  (let [count-map (make-count-map (apply concat
                                         (map #(read-json (second %))
                                              values)) {})]
    (max-val count-map nil 0)
   ))

(defn add-capture-reg-att
  [name exts]
  (def ^:dynamic *attribute-table*
    (assoc *attribute-table*
      name
      {:extractors exts
       :aggregator majority-cap-aggregator}))
  )