(ns morri.unique-tss.known-gene
  (:require [honeysql.core :as sql]
            [honeysql.helpers :refer :all]
            [morri.lib :as lib :refer [tprn]]
            [morri.meth450k.common.ucsc-db :as ucsc-db]))

(defn is-canonical?
  "Returns the first transcript if it is canonical, otherwise returns false"
  [transcript]
  (let [query (-> (select :transcript)
                  (from :knownCanonical)
                  (where [:= :transcript transcript])
                  sql/format)]
    (pos? (count (ucsc-db/ucsc-query query)))))

(defn is-ref-seq? [transcript]
  (let [query (-> (select :isRefSeq)
                  (from :kgTxInfo)
                  (where [:= :name transcript])
                  sql/format)]
    (= 1 (:isRefSeq (first (ucsc-db/ucsc-query query))))))

(defn transcript-score [transcript]
  (let [query (-> (select :cdsScore)
                  (from :kgTxInfo)
                  (where [:= :name transcript])
                  sql/format)]
    (:cdsScore (first (ucsc-db/ucsc-query query)))))

(defn which-tx-is-better [tx-a tx-b]
  (if-let [canonical (lib/which-is-better
                      tx-a
                      tx-b
                      is-canonical?)]
    canonical
    (if-let [ref-seq (lib/which-is-better
                      tx-a
                      tx-b
                      is-ref-seq?)]
      ref-seq
      (if-let [higher-score (lib/which-is-better tx-a tx-b transcript-score)]
        higher-score
        tx-a))))

;; piRNA X
;; rRNA? Can't find any
;; tRNA X
;; snRNA X
;; snoRNA X
;; mir X

;; just get rid of all non-coding less than 200

;; result rows: 77028


(def good-txs (delay (ucsc-db/ucsc-query ["
SELECT knownGene.name from knownGene
join kgXref on knownGene.name = kgXref.kgID
join kgTxInfo on knownGene.name = kgTxInfo.name
where description not like '%microRNA%'
and description not like '%small nucleolar RNA.'
and description not like '%small nuclear RNA.'
and tRnaName = ''
and description not like '%piRNA%'
and not ((category = 'noncoding' or category = 'nearcoding')
and txEnd - txStart < 200)
"])))

;; (count @good-txs)


;; (defn only-one? [pred? a-list]
;;   (if-let [valid-items (filter pred? a-list)]
;;     (if (= 1 (count valid-items))
;;       (first valid-items))))

;; (defn top-scoring [tx-list]
;;   (last (sort-by transcript-score tx-list)))

;; (defn best-transcript [tx-list]
;;   (if-let [canonical (only-one? is-canonical? tx-list)]
;;     canonical
;;     (if-let [refseq (only-one? is-ref-seq? tx-list)]
;;       refseq
;;       (top-scoring tx-list))))


;; (defn get-gene-symbol-transcripts [gene-symbol]
;;   (map :kgId (ucsc-db/ucsc-query (jdbc-sql/select :kgId :kgXref
;;                                            (jdbc-sql/where
;;                                             {:geneSymbol gene-symbol})))))


;; (defn unique-tss-gs-transcripts [gene-symbol]
;;   (unique-tss-transcripts (get-gene-symbol-transcripts gene-symbol)))
