(ns onyx.plugin.s3
  (:require [clojure.core.async :refer [chan >!! <!! alts!! timeout close!]]
            [onyx.peer.pipeline-extensions :as p-ext]
            [onyx.static.default-vals :refer [defaults]]
            [onyx.peer.operation :refer [kw->fn]]
            [onyx.extensions :as extensions]
            [taoensso.timbre :refer [info warn fatal]]
            [aws.sdk.s3 :as s3]))

(def s3-defaults
  {:s3/chan-capacity 10000
   :s3/checkpoint-interval 5000})

(defn reader-process [input-stream ch decompress-fn offset]
  (with-open [rdr (clojure.java.io/reader input-stream)]
    (doseq [[line offset] (map vector (drop offset (line-seq rdr)) (range))]
      (>!! ch {:line (decompress-fn line) :offset offset}))
    (>!! ch :done)))

(defn highest-offset-to-commit [offsets]
  (->> (partition-all 2 1 offsets)
       (partition-by #(- (or (second %) (first %)) (first %)))
       (first)
       (last)
       (last)))

(defn checkpoint-process [log pending-commits checkpoint-interval task-id]
  (try
    (loop []
      (Thread/sleep checkpoint-interval)
      (when-let [offset (highest-offset-to-commit @pending-commits)]
        (info "Writing offset to ZooKeeper: " offset)
        (extensions/force-write-chunk log :chunk offset task-id)
        (swap! pending-commits (fn [coll] (remove (fn [k] (<= k offset)) coll))))
      (recur))
    (catch InterruptedException e
      (throw e))
    (catch Throwable e
      (fatal e))))

(defn find-initial-offset [log task-id]
  (try
    (extensions/read-chunk log :chunk task-id)
    (catch Throwable e
      ;; Start from the beginning
      0)))

(defn inject-buffered-reader
  [{:keys [onyx.core/log onyx.core/task-map onyx.core/log onyx.core/task-id] :as event} lifecycle]
  (let [ch (chan (or (:s3/chan-capacity task-map) (:s3/chan-capacity s3-defaults)))
        creds {:access-key (:s3/access-key task-map)
               :secret-key (:s3/secret-key task-map)}
        input-stream (:content (s3/get-object creds "onyx-s3" "blah.edn"))
        decompress-fn (kw->fn (:s3/decompress-fn task-map))
        pending-commits (atom #{})
        checkpoint-interval (or (:s3/checkpoint-interval task-map) (:s3/checkpoint-interval s3-defaults))
        initial-offset (find-initial-offset log task-id)]
    (info (format "Reading from S3, dropping the first %s lines" initial-offset))
    (let [s3-reader-fut (future (reader-process input-stream ch decompress-fn initial-offset))
          s3-checkpoint-fut (future (checkpoint-process log pending-commits checkpoint-interval task-id))]
      {:s3/reader-ch ch
       :s3/reader-fut s3-reader-fut
       :s3/checkpoint-fut s3-checkpoint-fut
       :s3/pending-commits pending-commits
       :s3/pending-messages (atom {})})))

(defn close-reader-resources [event lifecycle]
  (close! (:s3/read-ch event))
  (future-cancel (:s3/reader-fut event))
  (future-cancel (:s3/checkpoint-fut event))
  {})

(defmethod p-ext/read-batch :s3/read-file
  [{:keys [onyx.core/task-map s3/reader-ch s3/pending-messages s3/drained?] :as event}]
  (let [pending (count @pending-messages)
        max-pending (or (:onyx/max-pending task-map) (:onyx/max-pending defaults))
        batch-size (:onyx/batch-size task-map)
        max-segments (min (- max-pending pending) batch-size)
        ms (or (:onyx/batch-timeout task-map) (:onyx/batch-timeout defaults))
        step-ms (/ ms (:onyx/batch-size task-map))
        timeout-ch (timeout ms)
        batch (if (zero? max-segments)
                (<!! timeout-ch)
                (loop [segments [] cnt 0]
                  (if (= cnt batch-size)
                    segments
                    (if-let [message (first (alts!! [reader-ch timeout-ch] :priority true))]
                      (recur (conj segments
                                   (if (= message :done)
                                     {:id (java.util.UUID/randomUUID)
                                      :input :s3
                                      :message :done}
                                     {:id (java.util.UUID/randomUUID)
                                      :input :s3
                                      :message (:line message)
                                      :offset (:offset message)}))
                             (inc cnt))
                      segments))))]
    (doseq [m batch]
      (swap! pending-messages assoc (:id m) (select-keys m [:message :offset])))
    {:onyx.core/batch batch}))

(defmethod p-ext/ack-message :s3/read-file
  [{:keys [s3/pending-messages s3/pending-commits]} message-id]
  (when-let [offset (:offset (get @pending-messages message-id))]
    (swap! pending-commits conj offset))
  (swap! pending-messages dissoc message-id))

(defmethod p-ext/retry-message :s3/read-file
  [{:keys [s3/pending-messages s3/reader-ch]} message-id]
  (let [msg (get @pending-messages message-id)]
    (if (= (:message msg) :done)
      (>!! reader-ch :done)
      (>!! reader-ch (get @pending-messages message-id)))
    (swap! pending-messages dissoc message-id)))

(defmethod p-ext/pending? :s3/read-file
  [{:keys [s3/pending-messages]} message-id]
  (get @pending-messages message-id))

(defmethod p-ext/drained? :s3/read-file
  [{:keys [s3/drained? s3/pending-messages] :as event}]
  (let [x @pending-messages]
    (and (= (count (keys x)) 1)
         (= (first (map :message (vals x))) :done))))

(defn inject-buffered-writer
  [{:keys [onyx.core/task-map]} lifecycle]
  #_  {:datomic/conn (d/connect (:datomic/uri task-map))})

(defn close-writer-resources [])

(defmethod p-ext/write-batch :s3/write-file
  [{:keys [onyx.core/results onyx.core/task-map] :as pipeline}]
  (let [messages (mapcat :leaves results)]
  #_  @(d/transact (:datomic/conn pipeline)
                 (map #(assoc % :db/id (d/tempid (:datomic/partition task-map)))
                      (map :message messages)))
    {:onyx.core/written? true}))

(defmethod p-ext/seal-resource :s3/write-file
  [event]
  {})

(def reader-calls
  {:lifecycle/before-task-start inject-buffered-reader
   :lifecycle/after-task-end close-reader-resources})

(def writer-calls
  {:lifecycle/before-task-start inject-buffered-writer
   :lifecycle/after-task-end close-writer-resources})
