(ns morri.unique-tss
  (:require [clojure.string :as str]
            [honeysql.core :as sql]
            [honeysql.helpers :refer :all]
            [morri.meth450k.common.command-line :as cli]
            [morri.meth450k.common.ucsc-db :as ucsc-db]
            [morri.meth450k.common.utils :as utils]
            [morri.unique-tss.gencode :as gencode]
            [morri.unique-tss.known-gene :as known-gene])
  (:gen-class))

(defn get-tx-coord-map [table transcript]
  (let [query (-> (select [:chrom :chr]
                          [:txStart :start]
                          [:txEnd :stop]
                          :strand )
                  (from table)
                  (where [:= :name transcript])
                  sql/format)]
    (first (ucsc-db/ucsc-query query))))

(defn get-tx-tss [table tx]
  (let [coord-map (get-tx-coord-map table tx)
        chr (:chr coord-map)
        strand (:strand coord-map)
        tss (if (= "+" strand) (:start coord-map) (:stop coord-map))]
    {:chr chr :strand strand :tss tss})); this map will be a key for the tx-map

(defn try-add-tx [table which-tx-is-better tx-map this-transcript]
  (let [tss (get-tx-tss table this-transcript)
        existing-transcript (tx-map tss)]
    (assoc tx-map tss
           (which-tx-is-better this-transcript existing-transcript))))

(defn unique-tss-transcripts [table which-tx-is-better some-transcripts]
  (vals (reduce
         (partial try-add-tx table which-tx-is-better)
         {}
         some-transcripts)))

(def gene-model-options #{:wgEncodeGencodeBasicV17
                          :knownGene})

(def options-config
  [["-h" "--help" "Generate a unique list of all the tss"
    :default false :flag true]
   ["-f" "--output-file" "Output file for the Unique tss list"]
   ["-l" "--limit" "Limit for testing" :default false :parse-fn cli/ensure-int]
   ["-g" "--gene-model"
    (str "Database for gene model, choose from " gene-model-options)
    :default :knownGene
    :parse-fn (cli/validate gene-model-options)]])

(defn unique-tss [{:keys [output-file limit gene-model]}]
  (let [txs (map :name (case gene-model
                         :wgEncodeGencodeBasicV17 @gencode/good-txs
                         :knownGene @known-gene/good-txs))
        txs (if limit (take limit txs) txs)
        which-tx-is-better (case gene-model
                             :wgEncodeGencodeBasicV17 gencode/which-tx-is-better
                             :knownGene known-gene/which-tx-is-better)
        uniq-tss-tx (unique-tss-transcripts gene-model which-tx-is-better txs)]
    (utils/csv-write output-file (map vector uniq-tss-tx))))

(defn -main
  [& args]
  (unique-tss (cli/parse-command-line args options-config)))
