(ns morri.meth450k.common.utils
  (:require [clojure.java.io :as io]
            [clojure.string :as str]
            [clojure.data.csv :as csv]
            [clojure.edn :as edn]
            [clojure.java.shell :as shell])
  (:import
   [org.apache.commons.io FilenameUtils]
   [org.apache.commons.math3.stat.descriptive SummaryStatistics]
   [org.apache.commons.math3.stat.inference TestUtils]))

(defn transparent [f arg]
  (do (f arg)
      arg))

(def tprn (partial transparent prn))

(defn full-path [file]
  (.getCanonicalPath (io/file file)))

(defn file-root [file]
  "Basename without extension, my own terminology"
  (FilenameUtils/getBaseName file))
;; tested

(defn base-name [file]
  "File basename with extension, like unix basename"
  (.getName (io/as-file file)))
;; tested

(defn dir-name [file]
  "Directory name for a file, like unix dirname"
  (.getParent (io/as-file file)))
;;tested

(defn file-exists? [file]
  (.exists (io/file file)))
;;tested

(defn build-path [& files]
  (.toString (apply io/file files)))

(defn cat-files [files file-name]
  (let [cat-cmd (tprn (str/join " "  (concat ["cat"] files [">" file-name])))]
    (shell/sh "sh" :in cat-cmd)))

(defn zip-file [file-name]
  (shell/sh "gzip" "-f" (tprn file-name)))

(defn zip-cat
  "cat files-to-zip into a single file and gzip the file"
  [zip-name files-to-zip]
  ;; With combined.bed will make combined.bed.gz with the contents of
  ;; all the files.
  ;; files must be in the same directory
  (let [zip-file-path (.toString (io/file (dir-name (first files-to-zip)) zip-name))]
    (cat-files files-to-zip zip-file-path)
    (zip-file zip-file-path)))

(defn quoted [s]
  (str "\"" s "\""))
;; tested

(defn convert-to-number [x]
  (cond (number? x) x
        :else (let [conv-x (read-string x)]
                (if (number? conv-x) conv-x
                    (throw (ClassCastException. (str "Can't convert \"" x "\" into a number")))))))
;; tested

(defn coll->nums [a-coll]
  (map convert-to-number a-coll))
;; tested

(defn remove-empties [a-coll]
  (remove empty? a-coll))

(defn populate-summarizer ^SummaryStatistics [a-vec]
  (let [summarizer (SummaryStatistics.)]
    (doseq [x a-vec]
      (.addValue summarizer x))
    summarizer))

(defn average [a-vec]
  (let [summarizer (populate-summarizer a-vec)]
    (.getMean summarizer)))
;; tested

(defn std-dev [a-vec]
  (let [summarizer (populate-summarizer a-vec)]
    (.getStandardDeviation summarizer)))

(defn str-std-dev [a-coll]
  ((comp std-dev coll->nums remove-empties) a-coll))

(defn str-avg [a-coll]
  ((comp average coll->nums remove-empties) a-coll))

(defn ttest [vec1, vec2]
  (if (and
       (>= (count vec1) 2)
       (>= (count vec2) 2))
    (TestUtils/tTest (populate-summarizer vec1) (populate-summarizer vec2))))

(defn equal-var-ttest [vec1 vec2]
  (if (and
       (>= (count vec1) 2)
       (>= (count vec2) 2))
    (TestUtils/homoscedasticTTest (populate-summarizer vec1) (populate-summarizer vec2))))

(defn str-ttest [coll1, coll2]
  (let [prepare-data (comp coll->nums remove-empties)]
    (ttest (prepare-data coll1) (prepare-data coll2))))

(defn str-equal-var-ttest [coll1, coll2]
  (let [prepare-data (comp coll->nums remove-empties)]
    (equal-var-ttest (prepare-data coll1) (prepare-data coll2))))

(def chr-re #"^chr")

(defn strip-chr [chr]
  (str/replace-first (name chr) chr-re ""))

(defn require-chr [chr]
  (if-not (re-find chr-re (str chr)) (str "chr" chr) chr))
;tested

(defn ucsc [chr start stop]
  (str (require-chr chr) ":" start "-" stop))
;tested

(defn print-and-flush [& s]
  (apply print s)
  (flush))

(defn show-progress
  [n sequence]
  (map #(do
          (if (= %1 1) (print-and-flush (str \newline "Processing ...")))
          (if (= (rem %1 n) 0) (print-and-flush (str " " %1 " ")))
          %2)
       (iterate inc 1)
       sequence))

(defn csv-write [output-file data]
  (with-open [out-file (io/writer output-file)]
    (csv/write-csv out-file (vec data))))

(defn csv-read [csv-file]
  (with-open [in-file (io/reader csv-file)]
    (doall (csv/read-csv in-file))))

(defn read-edn
  ([edn-file]
     (read-edn edn-file false))
  ([edn-file readers]
     (with-open [rdr (java.io.PushbackReader. (io/reader edn-file))]
       (if readers
         (edn/read {:readers readers} rdr)
         (edn/read rdr)))))

(defn between? [value range]
  (and (>= value (first range))
       (<= value (second range))))

(defn dist-rel-to-tss [strand tss pos]
  (case strand
    "+" (- pos tss)
    "-" (- tss pos)))

(defn meth-level [meth]
  (let [low-range [0 0.15]
        med-range [0.4 0.7]
        high-range [0.85 1]]
    (if (keyword? meth)
      (case meth
        :low low-range
        :med med-range
        :high high-range)
      (cond
       (between? meth low-range) :low
       (between? meth med-range) :med
       (between? meth high-range) :high
       :else nil))))

;; (meth-level 0.1) => low
;; (meth-level 0.3) => nil
;; (meth-level 0.5) => med
;; (meth-level 0.8) => nil
;; (meth-level 0.9) => high

;; (meth-level :low)
;; (meth-level :med)
;; (meth-level :high)
