(ns s3sync.core
  (:refer-clojure :exclude [sync file-seq])
  (:use s3sync.misc
        clojure.pprint)
  (:require [clojure.java.io :as io]
            [clj-time core coerce]
            digest
            amazonica.core [amazonica.aws.s3 :as s3]))

(defn- file [f]
  {:file f
   :last-modified (clj-time.coerce/from-long (.lastModified f))
   :md5-thunk (delay (debug-result *out*
                                   (str "Calculating MD5 for " (.getName f))
                                   (digest/md5 f)))})

(declare file-tree)

(defn- directory [f]
  (let [children (.list f)]
    (zipmap children (map #(file-tree (java.io.File. (.getPath f) %)) children))))

(defn file-tree [f]
  "Creates a (lazy) file tree of the given file.
  Directories are maps of string filenames to files.
  Leaf files are maps with three keys:
  :file          the java.io.File
  :last-modified the file's time last modified, org.joda.time.DateTime
  :md5-thunk     a delayed md5 calculation"
  (let [f (io/as-file f)]
    (cond (not f) nil
          (not (.exists f)) nil
          (.isDirectory f) (directory f)
          true (file f))))

(defn flatten-file-tree
  "Flattens a file tree and generates :keys for the files based
  on a given base path and the file's nested path."
  ([t] (flatten-file-tree t ""))
  ([t path]
   (cond (nil? t) []
         (:file t) [(assoc t :key (subs path 1))]
         true (mapcat (fn [[k v]]
                        (flatten-file-tree v (str path "/" k)))
                      t))))

(defn file-seq
  "Recursively sequences all files in a given file or directory,
  generatng :keys for the files as in flatten-file-tree."
  ([f] (file-seq f ""))
  ([f path] (flatten-file-tree (file-tree f) path)))

(defn- s3-summaries-helper [cred bucket key]
  ; Gets a lazy seq of object listings.
  ((fn f [next-marker running-total]
     (lazy-seq
       (let [listing (if next-marker
                       (s3/list-objects cred :bucket-name bucket :prefix key :marker next-marker)
                       (do
                         (write-safe *out* "Retrieving objects from " bucket "\n")
                         (s3/list-objects cred :bucket-name bucket :prefix key)))
             new-total (+ running-total (count (:object-summaries listing)))]
         (write-safe *out* "Got " new-total " objects\n")
         (if (boolean (:truncated? listing)) ; Impressively, Boolean/FALSE evals to true
           (cons listing (f (:next-marker listing) new-total))
           (do
             (write-safe *out* "Done\n")
             [listing])))))
     nil 0))

(defn s3-summaries
  "Creates a (lazy) file seq with the given S3 info. Path empty by default.
  Might need to reconnect to S3 to realize the seq. The metadata of the files
  will be as in aws.sdk.s3 with the standard s3sync additions."
  ([cred bucket opts] (s3-summaries cred bucket "" opts))
  ([cred bucket key opts] (s3-summaries cred bucket key "" opts))
  ([cred bucket key path opts]
   (map #(assoc % :metadata-thunk
                (delay (debug-wrap *out* (str "Getting metadata for " (:key %))
                                   (s3/get-object-metadata cred bucket (:key %)))))
        (mapcat :object-summaries (s3-summaries-helper cred bucket key)))))

(defn- update-map? [src dest]
  ;True if src has a kv not in dest.
  (some not (map (fn [[k v]] (= (get dest k) v)) src)))

(def ^:private amzn-meta-keys
  #{:cache-control :content-disposition :content-encoding
    :content-type :expiration-time :expiration-time-rule-id
    :restore-expiration-time :server-side-encryption :user-metadata})

; Some meta keys can be auto-generated
(def ^:private amzn-custom-meta-keys (disj amzn-meta-keys :content-type))

(defn amzn-meta [m] (select-keys m amzn-meta-keys))

(defn- update-meta?
  "Tells if local represents an update to remote metadata."
  [local remote-meta]
  (update-map? (amzn-meta local) (amzn-meta remote-meta)))

; Per Amazon docs, an etag is an MD5 iff it's 32 hex chars
; Javadoc says etag is always md5--this is wrong
(defn- md5-from-summary [{etag :etag}]
  (if (re-matches #"[\dA-Fa-f]{32}" etag) etag))

(defn- update-data?
  "Tells whether to update data for a local file and remote summary.
  Logical true if data should be updated, false otherwise."
  [local remote-summary opts]
  (and
    (:file local)
    (or (:ignore-last-modified opts)
        (clj-time.core/after? (:last-modified local 0)
                              (:last-modified remote-summary 0)))
    (or (:ignore-md5 opts)
        (let [rmd5 (md5-from-summary remote-summary)]
          (and rmd5 (not= rmd5 @(:md5-thunk local)))))))

(defn- gen-sync-instr [local remote-summary opts]
  (cond (nil? remote-summary) ::data
        (update-data? local remote-summary opts) ::data
        (and (:check-metadata opts)
             (update-meta? local @(:metadata-thunk remote-summary))) ::metadata
        true nil))

(defn- gen-sync-instrs [local-file-seq remote-summary-seq opts]
  (let [remote-summaries (zipmap (map :key remote-summary-seq) remote-summary-seq)]
    (map (fn [l]
           (let [r (get remote-summaries (:key l))]
             [(gen-sync-instr l r opts) l r]))
         local-file-seq)))

(defn gen-amzn-metadata [local remote]
  "For a file, get the metadata that will be sent to s3
  (excluding autogenerated metadata)."
  (into {} (filter (fn [[_ v]] v) ; drop nils
                   (amzn-meta (merge @(:metadata-thunk remote) local)))))

(defn check-cache-control [remote]
  (let [cc (-> remote :metadata-thunk deref :cache-control)
        cc (when cc (re-find #"max-age\s*=\s*(\d+)" cc))
        cc-val (some-> cc (get 1) Long/parseLong (> 60))]
    (when cc-val
      (write-safe *out* "Notice: overwriting data with cache-control: " (get cc 0) "\n"))))

(defn sync-data [cred local remote]
  "Sync the data (and metadata) for the given file to s3."
  (check-cache-control remote)
  (debug-wrap *out* (str "Writing " (:key local) ", "
                         (.length (:file local)) " bytes")
              (s3/put-object cred
                             :bucket-name (:bucket-name remote)
                             :key (:key local)
                             :file (:file local)
                             :metadata (gen-amzn-metadata local remote))))

(defn sync-metadata [cred local remote]
  "Sync the metadata for the given file to s3. Metadata
  is overwritten, not updated."
  (debug-wrap *out* (str "Updating metadata for " (:key local))
              (pprint (gen-amzn-metadata local remote))
              (s3/copy-object cred
                              :source-bucket-name (:bucket-name remote)
                              :destination-bucket-name (:bucket-name remote)
                              :source-key (:key local)
                              :destination-key (:key local)
                              :new-object-metadata (gen-amzn-metadata local remote))))

(defn- do-sync-instr [cred [instr local remote] opts]
  (case instr
    ::data (sync-data cred local remote)
    ::metadata (sync-metadata cred local remote)
    nil nil))

(defn sync-summaries [cred local-file-seq remote-summaries opts]
  "Sync the given local-file-seq to the given remote-summaries, as in 'sync."
  (let [instrs (gen-sync-instrs local-file-seq remote-summaries opts)
        counts (group-by identity (map first instrs))]
    (write-safe *out* (count (::data counts)) " files to upload\n")
    (write-safe *out* (count (::metadata counts)) " metadata sets to update\n")
    (dorun (map #(do-sync-instr cred % opts) instrs))))

(defn sync [cred bucket local-file-seq opts]
  "Sync the given local-file-seq to the given s3 bucket. Valid opts include
  :check-metadata, :ignore-md5, :ignore-last-modified."
  (sync-summaries cred local-file-seq (s3-summaries cred bucket opts) opts))

(defn re-map-files [f regex coll]
  "Maps a function to a file-seq while matching the given regex to the file key.
  f should be a fn of (file, regex match)."
  (map #(some->> % :key (re-find regex) (f %)) coll))

(defn add-meta-by-extension
  "Adds file metadata based on a map of file extensions -> metadata.
  Example usage:
  (add-meta-by-extension {\"jpg\" {:cache-control \"public,3600\"}} my-files)"
  [extension-map coll]
  (re-map-files (fn [file match]
                  (merge file (get extension-map (subs match 1) {})))
                #"\.[^\/\\\.]+$" coll))
