(ns beam-aws.es
  (:require [cheshire.core :as json]
            [taoensso.timbre :as log]
            [clj-http.client :as client]
            [environ.core :as environ]
            [aws-sig4.middleware :as aws-sig4]))

(def chunk-size 1000)
(def sleep-ms 200)

(declare post!)
(declare put!)

(def wrap-aws-auth
  (aws-sig4/build-wrap-aws-auth {:region     (:region environ/env)
                                 :service    "es"
                                 :access-key (:aws-access-key-id environ/env)
                                 :secret-key (:aws-secret-access-key environ/env)
                                 :token      (:aws-session-token environ/env)}))

(defn- post-bulk-uri
  [uri]
  (str "http://" uri "/_bulk"))

(defn- put-uri
  [es-uri index id]
  (str "http://" es-uri "/" index "/doc/" id))

(defn- auth-put!
  "Wraps http request with sig4 auth and tries put request.
  Parses response body and converts to assoc map."
  [es-uri index id doc]
  (let [uri (put-uri es-uri index id)]
    (log/info "Posting to elasticsearch uri:" uri)
    (-> (client/with-additional-middleware
          [wrap-aws-auth aws-sig4/wrap-aws-date]
          (client/put uri {:content-type :json :body (json/generate-string doc)}))
        :body
        (json/parse-string,,, true))))

(defn- auth-post!
  "Wraps http request with sig4 auth and tries post request.
  Parses response body and converts to assoc map."
  [uri body]
  (-> (client/with-additional-middleware
        [wrap-aws-auth aws-sig4/wrap-aws-date]
        (client/post uri {:content-type :json :body body}))
      :body
      (json/parse-string,,, true)))

(defn- auth-get!
  "Wraps http request with sig4 auth and tries get request.
  Parses response body and converts to assoc map."
  [uri body]
  (-> (client/with-additional-middleware
        [wrap-aws-auth aws-sig4/wrap-aws-date]
        (client/get uri {:content-type :json :body body}))
      :body
      (json/parse-string,,, true)))

(defn- check-for-put-errors
  "Checks elasticsearch put response for errors. Throws
  Exception if errors detected."
  [response-body]
  (if (:errors response-body) (throw (Exception. "Elasticsearch put! errors flagged by elasticsearch response"))))

(defn- handle-put-exceptions
  "Handles put exceptions, logs errors and retries call if retry
  count necessitates. Throws exception if retries exhausted."
  [es-uri index id doc retries e]
  (if (> retries 0)
    (let [sleep-time (+ sleep-ms (rand-int sleep-ms))]
      (log/warn e "Error flagged in Elasticsearch put.")
      (Thread/sleep sleep-time)
      (put! es-uri index id doc (dec retries)))
    (do (log/error e "Could not post body to Elasticsearch after retry count exhausted.")
        (throw e))))

(defn put!
  "Puts doc to elasticsearch"
  [es-uri index id doc retries]
  (try
    (-> (auth-put! es-uri index id doc)
        check-for-put-errors)
    {:success true}
    (catch Exception e
      (handle-put-exceptions es-uri index id doc retries e))))

(defn- bulk-errors
  "Returns id for all docs that resulted in erros in elasticsearch bulk
  insert."
  [results]
  (if (:errors results)
    (->> (:items results)
         (filter #(< 299 (get-in % [:index :status])),,,)
         (map #(get-in % [:index :_id]),,,))
    []))

(defn- filter-bulk-es-by_id
  "Finds value in list based on `k` a key and `v` a value.
  Returns a list of results"
  [list id]
  (filter #(= (get-in % [:action :index :_id]) id) list))

(defn- bulk-body-string
  "Returns a string suitable for using as :body element in elasticsearch bulk
  index request. `bulk-insert-entries` is a map of index objects to be mapped"
  [bulk-insert-entries]
  (reduce (fn [result-str entry]
            (str result-str (json/generate-string (:action entry)) "\n"
                 (json/generate-string (:doc entry)) "\n")) "" bulk-insert-entries))

(defn- bulk-post-fails
  "Returns the docs from bulk-entries that failed the bulk-post request"
  [bulk-docs result]
  (let [errors (bulk-errors result)]
    (map #(first (filter-bulk-es-by_id bulk-docs %)) errors)))

(defn- handle-post-error
  "Handles an error repsonse from elasticsearch and retries bulk post."
  [uri bulk-docs result retries]
  (if (> retries 0)
    (let [error-docs (bulk-post-fails bulk-docs result)
          message {:docs-attempted (count bulk-docs) :docs-failed (- (count bulk-docs) (count error-docs)) :retries retries}]
      (log/warn "Elasticsearch errors detected in bulk insert. Message: " message)
      (post! uri error-docs (dec retries)))
    (do (log/error "Elasticsearch failed to insert all bulk docs.")
        (throw (Exception. "Elasticsearch bulk insert failed with errors")))))

(defn- check-for-post-errors
  "Checks elasticsearch post response for errors. Throws
  Exception if errors detected."
  [response uri bulk-docs retries]
  (if (:errors response)
    (handle-post-error uri bulk-docs response retries)))

(defn- handle-post-exceptions
  "Handles put exceptions, logs errors and retries call if retry
  count necessitates. Throws exception if retries exhausted."
  [bulk-uri bulk-docs retries e]
  (if (> retries 0)
    (let [sleep-time (+ sleep-ms (rand-int sleep-ms))]
      (log/warn e "Error flagged in Elasticsearch post.")
      (Thread/sleep sleep-time)
      (post! bulk-uri bulk-docs (dec retries)))
    (do (log/error "Could not post body to Elasticsearch after retry count exhausted." e)
        (throw e))))

(defn post!
  "Posts a bulk request to elasticsearch and will retry in
  accordance with `retries`. `body` is a clojure map of form `bulk-entry`.
  Returns `{:success true}` if no Exceptions are raised. Otherwise throws exception."
  [bulk-uri bulk-docs retries]
  (log/debug "Posting to elasticsearch. Retry count" retries)
  (try
    (-> (auth-post! bulk-uri (bulk-body-string bulk-docs))
        (check-for-post-errors ,,, bulk-uri bulk-docs retries))
    {:success true}
    (catch Exception e
      (handle-post-exceptions bulk-uri bulk-docs retries e))))

(defn- bulk-entry
  "Creates a clojure map for a bulk entry. This function creates Elasticsearch
  index row and doc row based on Elasticsearch format here:
  https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html."
  [index id doc]
  {:action {:index
            {:_index index
             :_type  "doc"
             :_id    id}}
   :doc    doc})

(defn- bulk-entries
  "Builds list of objects that elasticsearch expects for a bulk insert.
  Objects are subsequently stringified by `es-bulk-body-string`."
  [index ids data]
  (map #(bulk-entry index %1 %2) ids data))

(defn post-bulk!
  [es-uri index ids data]
  (log/info "Writing" (count data) "docs to" index "in elasticsearch")
  (let [bulk-uri (post-bulk-uri es-uri)
        bulk-docs (bulk-entries index ids data)
        chunked-bulk-docs (partition-all chunk-size bulk-docs)
        results (flatten (map #(post! bulk-uri % 10) chunked-bulk-docs))]
    results))

(defn- criteria->query
  "Converts criteria map to Elasticsearch match_phrase query format.
  If provided with `start` `end` and `timezone` params, returns a query
  object that includes `range` query of epoc element."
  ([criteria]
   (let [match-phrase-query
         (map (fn [[key val]]
                (hash-map :match_phrase {key val})) criteria)]
     {:query {:bool {:must match-phrase-query}}}))
  ([criteria start end timezone]
   (let [match-phrase-query
         (mapv (fn [[key val]]
                 (hash-map :match_phrase {key val})) criteria)
         range-query {:range {:epoch {:gte start :lte end :time_zone timezone}}}]
     {:query {:bool {:must (conj match-phrase-query range-query)}}})))

(defn- histogram-agg-query
  "Creates an aggs elasticsearch query object based on `field-list`"
  [interval timezone field-list]
  (let [fields (reduce (fn [m val] (into m {(keyword val) {:sum {:field (name val)}}})) {} field-list)]
    {:aggs {:month {:date_histogram {:field     "epoch",
                                     :interval  interval,
                                     :time_zone timezone},
                    :aggs           fields}}}))

(defn- flat-bucket
  "Flattens histogram-bucket and returns a flat map with `field-list` items"
  [bucket field-list]
  (let [field-list (map keyword field-list)
        field-values (map #(get-in bucket [% :value]) field-list)
        clean-bucket (dissoc bucket field-list)]
    (merge clean-bucket (zipmap field-list field-values))))

(defn delete-by-query!
  "Deletes docs from elasticsearch based on `criteria`.
  `criteria` is a map with multiple `field = match-value` search criteria."
  [es-uri index criteria]
  (log/info "Deleting docs from elasticsearch index:" index "with criteria:" criteria)
  (let [body (-> (criteria->query criteria) (json/generate-string ,,,))]
    (try
      (auth-post! (str "http://" es-uri "/" index "/_delete_by_query?conflicts=proceed") body)
      (catch Exception e
        (do (log/error e)
            (throw e))))))

(defn parse-histogram-result
  "Extracts the required values from elasticsearch result and converts to
  a simple flat array."
  [es-result field-list]
  (let [histogram-buckets (get-in es-result [:aggregations :month :buckets])]
    (->> (map #(flat-bucket % field-list) histogram-buckets)
         (map #(assoc % :timestamp (:key_as_string %)),,,)
         (map #(dissoc % :key_as_string)),,,)))

(defn date-histogram!
  "Returns a date histogram based on params:
  `interval` hour | day | week | month | year,
  `start` a local date - for example `2017-05-04`
  `end` local date
  `timezone` for example `Australia/Sydney`
  `criteria` a key value map of field critera to search by.
  `field-list` a keyword/string list of field names to aggregate."
  [es-uri index interval start end timezone criteria field-list]
  (log/info "Requesting data histogram from elasticsearch. Index:" index "timespan:" interval "start:" start "end:" end "timezone:" timezone)
  (let [body-string (json/generate-string
                      (merge
                        (histogram-agg-query interval timezone field-list)
                        (criteria->query criteria start end timezone)))]
    (try
      (-> (auth-get! (str "http://" es-uri "/" index "/_search?size=0") body-string)
          (parse-histogram-result,,, field-list))
      (catch Exception e
        (do (log/error e)
            (throw e))))))

(defn- parse-query-result
  "Parses elasticsearch query result returning just the
  hit objects as a list of clojure maps."
  [query-result]
  (->> (get-in query-result [:hits :hits])
       (map :_source,,,)))

(defn query!
  "Returns docs from `index` based on `criteria`.
  `criteria` in form of a clojure map.
  Max return size set to 1000."
  [es-uri index criteria]
  (log/info "Querying docs from elasticsearch index:" index "with criteria:" criteria)
  (let [size 1000
        criteria (criteria->query criteria)
        body (-> (assoc criteria :size size) json/generate-string)]
    (try
      (-> (auth-post! (str "http://" es-uri "/" index "/_search?") body)
          parse-query-result)
      (catch Exception e
        (do (log/error e)
            (throw e))))))