;; Copyright (c) Sławek Gwizdowski
;;
;; Permission is hereby granted, free of charge, to any person obtaining
;; a copy of this software and associated documentation files (the "Software"),
;; to deal in the Software without restriction, including without limitation
;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
;; and/or sell copies of the Software, and to permit persons to whom the
;; Software is furnished to do so, subject to the following conditions:
;;
;; The above copyright notice and this permission notice shall be included
;; in all copies or substantial portions of the Software.
;;
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
;; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
;; IN THE SOFTWARE.
;;
(ns ^{:author "Sławek Gwizdowski"
      :doc "Input and Output processors."}
  szew.io
  (:require [clojure.java.io :as clj.io]
            [clojure.string :as string]
            [clojure-csv.core :as csv]
            [clojure.data.zip :as data.zip]
            [clojure.data.xml :as data.xml]
            [clojure.zip :as clj.zip]
            [szew.io.util :as util])
  (:import [java.io File
                    BufferedReader BufferedInputStream InputStream
                    BufferedWriter]
           [java.security MessageDigest]
           [java.util.zip GZIPInputStream GZIPOutputStream]))


;; # Inputs!

;; ## The Protocols

(defprotocol Input
  (in! [spec source] "Read source, feed data to processor, return result."))

(defprotocol Output
  (sink [spec target] "Returned callable will eat seq and write to target."))

;; ## Text files, line reader.

(defrecord Lines [encoding   ;; r+w
                  eol        ;; w
                  append     ;; w
                  final-eol  ;; w
                  part       ;; w
                  processor  ;; r
                  reader     ;; r
                  writer]    ;; w
  Input
  (in! [spec source]
    (io! "Reading from file!"
         (with-open [^BufferedReader r (reader source :encoding encoding)]
           (processor (line-seq r)))))
  Output
  (sink [spec target]
    (fn lines-sink [a-seq]
      (io! "Writing to file!"
           (with-open [^BufferedWriter w (writer target
                                                 :encoding encoding
                                                 :append append)]
             (loop [parts (partition part part nil (map str a-seq))]
               (when (seq parts)
                 (.write w ^String (string/join eol (first parts)))
                 (when (or (seq (rest parts)) final-eol)
                   (.write w ^String eol))
                 (recur (rest parts)))))))))

(defn lines
  "Accepts map, creates Lines spec, good for reading and writing. Fields are:

  :encoding used for reading and writing, defaults to UTF-8
  :eol used for writing, defaults to \\n
  :append used for writing, defaults to false
  :final-eol used for writing, defaults to false
  :part used for writing, this many rows gets written at once, defaults to 64
  :processor used for reading, defaults to vec
  :reader, used for reading, defaults to clojure.java.io/reader
  :writer, used for writing, defaults to clojure.java.io/writer

  Invoking (lines) produces default spec.
  "
  ([]
   (map->Lines {:encoding  "UTF-8"
                :eol       "\n"
                :append    false
                :final-eol false
                :part      64
                :processor vec
                :reader    clj.io/reader
                :writer    clj.io/writer}))
  ([spec]
   (into (lines) spec)))

;; ## Delimiter Separated Values & Friends

(defrecord DSV [encoding    ;; r+w
                append      ;; w
                delimiter   ;; r+w
                strict      ;; r
                eol         ;; r+w
                quote-char  ;; r+w
                force-quote ;; w
                part        ;; w
                processor   ;; r
                reader      ;; r
                writer]     ;; w
  Input
  (in! [spec source]
    (letfn [(indexer [idx rows]
              (cons
                ;; Goof on the get-go:
                (try
                  (with-meta (first rows) {:line idx})
                  (catch Exception ex
                    (throw (ex-info (format "Issue on line: %d (1-indexed)."
                                            idx)
                                    {:line idx}
                                    ex))))
                ;; Goof on the next candidate:
                (try
                  (when (seq (rest rows))
                    (lazy-seq (indexer (inc idx) (rest rows))))
                  (catch Exception ex
                    (throw (ex-info (format "Issue on line: %d (1-indexed)."
                                            (inc idx))
                                    {:line (inc idx)}
                                    ex))))))]
      (io! "Reading from file!"
        (with-open [^BufferedReader r (reader source :encoding encoding)]
          (processor (indexer 1 (csv/parse-csv r
                                               :delimiter delimiter
                                               :end-of-line eol
                                               :quote-char quote-char
                                               :strict strict)))))))
  Output
  (sink [spec target]
    (fn csv-sink [a-seq]
      (io! "Writing to file!"
           (with-open [^BufferedWriter w (writer target
                                                 :encoding encoding
                                                 :append append)]
             (loop [parts (partition part part nil a-seq)]
               (when (seq parts)
                 (.write w ^String (csv/write-csv (first parts)
                                                  :delimiter   delimiter
                                                  :end-of-line eol
                                                  :quote-char  quote-char
                                                  :force-quote force-quote))
                 (recur (rest parts)))))))))

(defn csv
  "Accepts map, creates DSV spec, good for reading and writing. Fields are:

  :encoding used for reading and writing, defaults to UTF-8
  :append used for writing, defaults to false
  :delimiter used for reading and writing, defaults to \\, can be a String
  :strict used for reading, defaults to true
  :eol used for reading and writing, defaults to \\n, can be a String (\\r\\n)
  :quote-char used for reading and writing, defaults to \\\"
  :force-quote used for writing, defaults to false
  :part used for writing, this many rows get written at once, defaults to 64
  :processor used for reading, defaults to vec
  :reader, used for reading, defaults to clojure.java.io/reader
  :writer, used for writing, defaults to clojure.java.io/writer

  Invoking (csv) produces default spec.

  When reading setting :eol to nil causes parser to accept both LF and CRLF.
  "
  ([]
   (map->DSV {:encoding    "UTF-8"
              :append      false
              :delimiter   \,
              :strict      true
              :eol         "\n"
              :quote-char  \"
              :force-quote false
              :part        64
              :processor   vec
              :reader      clj.io/reader
              :writer      clj.io/writer}))
  ([spec]
   (into (csv) spec)))

(defn tsv
  "Accepts map, creates TSV spec, good for reading and writing. Fields are:

  :encoding used for reading and writing, defaults to UTF-8
  :append used for writing, defaults to false
  :delimiter used for reading and writing, defaults to \\tab can be a String
  :strict used for reading, defaults to true
  :eol used for reading and writing, defaults to \\n, can be a String (\\r\\n)
  :quote-char used for reading and writing, defaults to \\\"
  :force-quote used for writing, defaults to false
  :part used for writing, this many rows get written at once, defaults to 64
  :processor used for reading, defaults to vec
  :reader, used for reading, defaults to clojure.java.io/reader
  :writer, used for writing, defaults to clojure.java.io/writer

  Invoking (tsv) produces default spec.

  When reading setting :eol to nil causes parser to accept both LF and CRLF.
  "
  ([]
   (assoc (csv) :delimiter \tab))
  ([spec]
   (into (tsv) spec)))

;; ## Fixed Width Processing

(defrecord FixedWidth [encoding   ;; r+w
                       append     ;; w
                       eol        ;; w
                       strict     ;; r
                       widths     ;; r+w
                       final-eol  ;; w
                       part       ;; w
                       processor  ;; r
                       reader     ;; r
                       writer]    ;; w
  Input
  (in! [spec source]
    ;;TODO: line indexing and error raising?
    (let [width-sum (reduce + widths)
          prefilter (if strict
                      (constantly true)
                      (comp (partial <= width-sum) count))
          preproc   (util/fixed-width-split widths)]
      (io! "Reading from file!"
           (with-open [^BufferedReader r (reader source :encoding encoding)]
             (->> (line-seq r)
                  (filter prefilter)
                  (map preproc)
                  processor)))))
  Output
  (sink [spec target]
    (let [proto-row (mapv #(string/join (repeat % " ")) widths)
          adjuster  (util/row-adjuster proto-row)
          widener   (fn [width fill content]
                      (subs (str content fill) 0 width))
          prepper   (comp string/join
                          (partial mapv widener widths proto-row)
                          adjuster
                          (partial mapv str))]
      (fn fixed-width-sink [a-seq]
        (io! "Writing to file!"
             (with-open [^BufferedWriter w (writer target
                                                   :encoding encoding
                                                   :append append)]
               (loop [parts (partition part part nil (map prepper a-seq))]
                 (when (seq parts)
                   (.write w ^String (string/join eol (first parts)))
                   (when (or (seq (rest parts)) final-eol)
                     (.write w ^String eol))
                   (recur (rest parts))))))))))

(defn fixed-width
  "Accepts map, creates FixedWidth spec, good for reading and writing. Fields are:

  :encoding used for reading and writing, defaults to UTF-8
  :append used for writing, defaults to false
  :eol used for reading and writing, defaults to \\n, can be a String (\\r\\n)
  :strict used for reading, defaults to true
  :widths used for reading and writing, defaults to [], should be [int]
  :final-eol used for writing, defaults to false
  :part used for writing, this many rows get written at once, defaults to 64
  :processor used for reading, defaults to vec
  :reader, used for reading, defaults to clojure.java.io/reader
  :writer, used for writing, defaults to clojure.java.io/writer

  Invoking (fixed-width) produces default spec.
  "
  ([]
   (map->FixedWidth {:encoding  "UTF-8"
                     :append    false
                     :eol       "\n"
                     :strict    false
                     :widths    []
                     :final-eol false
                     :part      64
                     :processor vec
                     :reader    clj.io/reader
                     :writer    clj.io/writer}))
  ([spec]
   (into (fixed-width) spec)))


;; ## Basic XML processing

(defrecord XML [encoding append processor reader writer]
  Input
  (in! [spec source]
    (io! "Reading from file!"
         (with-open [^BufferedReader r (reader source :encoding encoding)]
           (processor (data.xml/parse r)))))
  Output
  (sink [spec target]
    (fn xml-sink [a-seq]
      (io! "Writing to file!"
           (with-open [^BufferedWriter w (writer target
                                                 :encoding encoding
                                                 :append append)]
             (data.xml/emit a-seq w))))))

(defn xml
  "Accepts map, creates XML spec, good for reading and writing. Fields are:

  :encoding used for reading and writing, defaults to UTF-8
  :append used for writing, defaults to false
  :processor used for reading, defaults to XML realizer (see source)
  :reader, used for reading, defaults to clojure.java.io/reader
  :writer, used for writing, defaults to clojure.java.io/writer

  Invoking (xml) produces default spec.

  Tips:

  1. clojure.zip/xml-zip first
  2. clojure.data.zip.xml next: xml->, attr=, attr, :tag, tag=, text & text=
  3. clojure.zip/node within! xml-> will extract whole loc from zip
  4. xml-> returns sequence of matches.
  "
  ([]
   (letfn [(realizer [some-xml]
             (-> some-xml clj.zip/xml-zip data.zip/descendants dorun)
             some-xml)]
     (map->XML {:encoding  "UTF-8"
                :append    false
                :processor realizer
                :reader    clj.io/reader
                :writer    clj.io/writer})))
  ([spec]
   (into (xml) spec)))

;; ## File tree processor

(defn prunning-file-seq
  "Like file-seq, but prunes branches using given `follow?` predicate."
  ([^java.io.File entry follow?]
   (tree-seq
     (fn [^java.io.File f] (.isDirectory f))
     (fn [^java.io.File d] (filter follow? (.listFiles d)))
     entry))
  ([entry]
   (file-seq entry)))

(defrecord Files [only-existing-root follow? processor]
  Input
  (in! [spec source]
    (let [root ^File (clj.io/as-file source)]
      (when (or (not only-existing-root)
                (and (.exists root) (.canRead root)))
        (io! "Walking over files!"
             (processor (prunning-file-seq root follow?)))))))

(defn files
  "Accepts map, creates Files spec, good for reading and writing. Fields are:

  :only-existing-root used for reading, defaults to true
  :follow? used for reading, prune/keep predicate, defaults to (constantly true)
  :processor used for reading, defaults to vec

  Invoking (files) produces default spec.
  "
  ([]
   (map->Files {:only-existing-root true
                :follow?            (constantly true)
                :processor          vec}))
  ([spec]
   (into (files) spec)))

;; ## Single file -> hash

;; This code is written like this, because old Java.
(defrecord Hasher [hash-name skip-bytes sample-size input-stream]
  Input
  (in! [spec source]
    (let [a-file      ^File (clj.io/as-file source)
          sample-size (long (if (= :full sample-size)
                              (* 128 1099511627776) sample-size)) ;; 128TiB
          buff-size   (long (* 256 1024))
          buffer      (byte-array buff-size (byte 0))
          mess        (MessageDigest/getInstance hash-name)]
      (io! "Reading from file!"
           (with-open [^BufferedInputStream r
                       (doto ^InputStream (input-stream a-file)
                         (.skip skip-bytes))]
             (loop [read-bytes (.read r buffer 0 (min buff-size sample-size))
                    to-read    sample-size]
               (let [left-to-read (if (neg? read-bytes)
                                    0 (- to-read read-bytes))]
                 (when-not (neg? read-bytes)
                   (.update mess buffer 0 read-bytes))
                 (if (zero? left-to-read)
                   (.toString (BigInteger. 1 (.digest mess)) 16)
                   (recur (.read r buffer 0 (min buff-size left-to-read))
                          left-to-read)))))))))

(defn hasher
  "Accepts map, creates Hasher spec, good for reading and writing. Fields are:

  :hash-name used for reading, defaults to SHA-256
  :skip-bytes used for reading, skips this many bytes, defaults to 0
  :sample-size used for reading, defaults to :full, can be :full or long
  :input-stream used for reading, defaults to clojure.java.io/input-stream
  "
  ([]
   (map->Hasher {:hash-name    "SHA-256"
                 :skip-bytes   0
                 :sample-size  :full
                 :input-stream clj.io/input-stream}))
  ([spec]
   {:post [(or (= :full (:sample-size %))
               (and (number? (:sample-size %))
                    (pos? (:sample-size %))))]}
   (into (hasher) spec)))


;; ## Helpers

(defn gzip-input-stream
  "Return output-stream with GZIPInputStream within.
  "
  [file-like & opts]
  (->> (cons file-like opts)
       (apply clj.io/input-stream)
       (GZIPInputStream.)
       (clj.io/input-stream)))

(defn gzip-output-stream
  "Return output-stream with GZIPOutputStream within.
  "
  [file-like & opts]
  (->> (cons file-like opts)
       (apply clj.io/output-stream)
       (GZIPOutputStream.)
       (clj.io/output-stream)))

(defn gzip-reader
  "Return reader with GZIPInputStream within.
  "
  [file-like & opts]
  (->> opts
       (cons (apply gzip-input-stream (cons file-like opts)))
       (apply clj.io/reader)))

(defn gzip-writer
  "Return writer with GZIPOutputStream within.
  "
  [file-like & opts]
  (->> opts
       (cons (apply gzip-output-stream (cons file-like opts)))
       (apply clj.io/writer)))

