(ns morri.table-import
  (:require
   [morri.meth450k.common
    [db-utils :as db-utils]
    [command-line :as cli]
    [utils :as utils :refer [tprn]]]
   [clojure.string :as string]
   [clojure.java.io :as io]
   [clojure.java.jdbc :as jdbc]
   [java-jdbc.ddl :as ddl]
   [java-jdbc.ddl :as sql]
   [pallet.thread-expr :refer [if->> when->> when-not->>]]
   [clojure.data.csv :as csv])
  (:gen-class))

;; Functions for converting a data table into a sqlite table

;; plannning to use it on a very large table

(defn db-safe-name [s]
  (string/replace (name s) #"\.|-| " "_"))
;tested

(defn prefix-if-needed [in]
  (if-not (re-find #"^_" in) (str "_" in) in))
;tested

(defn illumina-col-base [s]
  (last (string/split s #"#")))

(defn make-colspec [type-name columns]
  (zipmap columns (repeat type-name)))

;; switch to new api
(defn create-sqlite-table [db table columns]
  (do
    (db-utils/try-drop-table db table)
    (jdbc/db-do-commands db (apply ddl/create-table table (make-colspec :numeric columns)))))
;tested

(def options-config
  [["-h" "--help" "Show help" :default false :flag true]
   ["-f" "--file-name" "File to insert into the sqlite db"]
   ["-p" "--prefix" "Prefix the column names with an underscore (_) for sql compatability" :default false :flag true]
   ["-l" "--lower-case" "Convert column names to lower case" :default false :flag true]
   ["-g" "--genome-studio" "Geneome Studio file? Implies prefix and lower-case true" :default false :flag true]
   ["-s" "--sqlite-file" "File for storing the sqlite db"]
   ["-t" "--table-name" "Name for the resulting sqlite table" :parse-fn keyword]
   ["--n-extra-lines" "Extra lines before header" :default 8 :parse-fn cli/ensure-int]
   ["--csv" "CSV File?" :default false :flag true]
   ["--first-col-name" "Use this as the name for the first column?" :default false :parse-fn keyword]
   ["--sep" "Optional Separator" :default '#"\t" :parse-fn #(re-pattern %)]
   ["-indx" "--index-col" "Index column" :parse-fn keyword]])

;; I'm not sure if remove-quotes works any more or if we need it for anything.

(defn header-and-data [opts rdr]
  (let [{:keys [n-extra-lines sep csv first-col-name genome-studio]} opts
        prefix? (if genome-studio true (:prefix opts))
        lower-case? (if genome-studio true (:lower-case opts))
        split-data (drop n-extra-lines
                         (if csv
                           (csv/read-csv rdr)
                           (map #(string/split % sep) (line-seq rdr))))
        header (->> (first split-data)
                     (if->> first-col-name (#(cons first-col-name (next %))))
                     (if->> genome-studio (map illumina-col-base) identity)
                     (map db-safe-name)
                     (if->> prefix? (map prefix-if-needed) identity)
                     (if->> lower-case? (map string/lower-case) identity)
                     (map keyword))]
    [header (next split-data)]))

(defn data-table->sqlite
  [opts]
  (println "Importing" (:file-name opts))
  (with-open [raw-rdr (io/reader (:file-name opts))]
    (let [[header data] (header-and-data opts raw-rdr)
          db (db-utils/sqlite-db  (:sqlite-file opts))
          table (:table-name opts)
          index-col-input (:index-col opts)
          index-col (if index-col-input index-col-input (first header))]
      (create-sqlite-table db table header)
      (println "Inserting Data")
      (apply jdbc/insert! db table header
             (utils/show-progress 1000 data))
      (println "Creating Index on" index-col)
      (db-utils/try-create-index db table (vector index-col)))))

(defn -main  [& args]
  (let [options (cli/parse-command-line args options-config)]
    (data-table->sqlite options))
  (shutdown-agents))
