(ns babble.db
  (:import [net.sourceforge.pinyin4j PinyinHelper]
           [net.sourceforge.pinyin4j.format
               HanyuPinyinCaseType
               HanyuPinyinOutputFormat
               HanyuPinyinToneType
               HanyuPinyinVCharType])
  (:require [base.core :as base]
            [clj-http.client :as client]
            [clojure.data.json :as json]
            [clojure.java.io :as io]
            [clojure.java.jdbc :as jdbc]
            [clojure.java.shell :refer [sh]]
            [clojure.string :as string]
            [babble.core :as core]
            [stardate :as sd])
  (:gen-class))

(defn
  ^{:private true}
  drop-cedict-table
  []
  (jdbc/execute! base/pgdb [
    "DROP TABLE IF EXISTS babble.cedict CASCADE;" ]))

(defn
  ^{:private true}
  create-cedict-tmp-table
  []
  (jdbc/execute! base/pgdb [
    "CREATE SCHEMA IF NOT EXISTS babble;"])
  (jdbc/execute! base/pgdb [
    "DROP TABLE IF EXISTS babble.cedicttmp CASCADE;"])
  (jdbc/execute! base/pgdb [
    "CREATE TABLE babble.cedicttmp(
       id SERIAL PRIMARY KEY,
       simplified text NOT NULL,
       simplified_crc32 text NOT NULL,
       traditional text NOT NULL,
       pinyin text NOT NULL,
       english text NOT NULL);" ]))

(defn
  ^{:private true
    :doc "Insert CC-CEDICT into PostgreSQL"}
  populate-cedict-tmp-table
  []
  (dorun (map #(jdbc/execute! base/pgdb
    [ "INSERT INTO babble.cedicttmp
      (simplified, simplified_crc32, traditional, pinyin, english)
      VALUES (?, ?, ?, ?, ?);"
      (:simplified %)
      (core/crc32 (:simplified %))
      (:traditional %)
      (:pinyin %)
      (string/join "/" (:English %)) ]) @core/cc-cedict)))

(defn
  ^{:private true}
  create-cedict-indexes
  []
  (jdbc/execute! base/pgdb [
    "CREATE INDEX pinyin_idx
     ON babble.cedicttmp(pinyin text_pattern_ops);" ])
  (jdbc/execute! base/pgdb [
    "CREATE INDEX simplified_idx
     ON babble.cedicttmp(simplified text_pattern_ops);" ])
  (jdbc/execute! base/pgdb [
    "CREATE INDEX simplified_crc32_idx
     ON babble.cedicttmp(simplified_crc32 text_pattern_ops);" ])
  (jdbc/execute! base/pgdb [
    "CREATE INDEX traditional_idx
     ON babble.cedicttmp(traditional text_pattern_ops);" ]))

(defn
  ^{:private true}
  rename-cedict-tmp-table
  []
  (jdbc/execute! base/pgdb [
    "ALTER TABLE babble.cedicttmp RENAME TO cedict;"]))

(defn
  ^{:doc "Re-create cedict table from the data file"}
  refresh-cedict
  []
  (sh "wget" "http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz" "-P" "/home/c/Documents/Clojure/babble/resources")
  (io/delete-file "/home/c/Documents/Clojure/babble/resources/cedict_1_0_ts_utf-8_mdbg.txt")
  (sh "gzip" "-d" "/home/c/Documents/Clojure/babble/resources/cedict_1_0_ts_utf-8_mdbg.txt.gz")

  (create-cedict-tmp-table)
  (populate-cedict-tmp-table)
  (create-cedict-indexes)
  (drop-cedict-table)
  (rename-cedict-tmp-table)
  (base/log-all "babble" "info" "babble.cedict table updated")
  (base/notify "babble.cedict table updated"))

(defn
  ^{:private true}
  drop-cjkdecomp-table
  []
  (jdbc/execute! base/pgdb [
    "DROP TABLE IF EXISTS babble.cjkdecomp CASCADE;" ]))

(defn
  ^{:private true
    :doc "Create a blank cjkdecomp table"}
  create-cjk-decomp-table
  []
  (jdbc/execute! base/pgdb [
    "CREATE TABLE babble.cjkdecomp(
       k text PRIMARY KEY,
       k_crc32 text,
       cmd text NOT NULL,
       components jsonb NOT NULL);"]))

(defn
  ^{:private true
    :doc "Parse a single cjk-decomp entry"}
  parse-cjk-decomp-line
  [s]
  (let [[_ x cmd sub] (re-matches #"(.+):(.+)\((.*)\)" s)]
    {
      :x x
      :crc32 (if (re-find #"^\d+$" x) nil (core/crc32 x))
      :cmd cmd
      :sub (string/split sub #",")
    }
  )
)

(defn populate-cjk-decomp-table
  "Populate the cjkdecomp table"
  []
  (let [lines (string/split-lines
                (slurp (base/home "Dropbox/Mandarin/cjk-decomp-0.4.0.txt")))]
    (dorun
      (map (fn [line]
             (let [p (parse-cjk-decomp-line line)]
               (jdbc/execute! base/pgdb
                 [ "INSERT INTO babble.cjkdecomp
                    (k, k_crc32, cmd, components)
                    VALUES (?, ?, ?, ?::jsonb);"
                    (:x p)
                    (:crc32 p)
                    (:cmd p)
                    (json/write-str (:sub p))])))
           lines))))
