(ns thi.ng.fabric.facts.io.ntriples
  (:require
   [thi.ng.strf.core :as strf]
   [thi.ng.xerror.core :as err]))

(defn vocab-map
  [uri & xs]
  (->> xs (map (fn [x] [x (str uri (name x))])) (into {})))

(def xsd
  (vocab-map
   "http://www.w3.org/2001/XMLSchema#"
   :boolean
   :byte
   :short
   :integer
   :int
   :long
   :decimal
   :double
   :float
   :decimal))

(defmulti literal-value (fn [_ uri] uri))

(defmethod literal-value :default [_ _]       nil)
(defmethod literal-value (:byte xsd) [x _]    (strf/parse-int x))
(defmethod literal-value (:short xsd) [x _]   (strf/parse-int x))
(defmethod literal-value (:int xsd) [x _]     (strf/parse-int x 10))
(defmethod literal-value (:integer xsd) [x _] (strf/parse-int x 10))
(defmethod literal-value (:long xsd) [x _]    (strf/parse-long x 10))
(defmethod literal-value (:float xsd) [x _]   (strf/parse-float x))
(defmethod literal-value (:double xsd) [x _]  (strf/parse-double x))
(defmethod literal-value (:decimal xsd) [x _] (strf/parse-double x))
(defmethod literal-value (:boolean xsd) [x _] (strf/parse-boolean x))

#?(:clj
   (defn hex->str
     [chars]
     (-> (Integer/parseInt (apply str chars) 16)
         (Character/toChars)
         (String.)))
   :cljs
   (defn hex->str
     [chars]
     (.fromCharCode js/String (js/parseInt (apply str chars) 16))))

(def WS #{\space \tab \newline})

(defn parse-escape
  "Takes escape seq w/o leading backslash. Returns parsed char &
  stream after."
  [str]
  (condp = (first str)
    \x [(hex->str (take 2 (next str))) (drop 2 (next str))]
    \u [(hex->str (take 4 (next str))) (drop 4 (next str))]
    \n ["\n" (next str)]
    \r ["\r" (next str)]
    \b ["\b" (next str)]
    \t ["\t" (next str)]
    \f ["\f" (next str)]
    \" [\" (next str)]
    \\ [\\ (next str)]
    (err/illegal-arg! "illegal escape sequence")))

(defn discard-until
  "Reads allowed chars until stop returns true.
  Returns stream from stop char."
  [stream allowed stop]
  (loop [stream (seq stream)]
    (let [ch (first stream)]
      (if (or (nil? ch) (stop ch))
        stream
        (if (= \# ch)
          (let [stream (next (discard-until (next stream) identity #(= \newline %)))]
            (recur stream))
          (if (allowed ch)
            (recur (next stream))
            (err/illegal-arg! (str "illegal character: " ch))))))))

(defn read-until
  "Reads stream until stop char (incl. escape seqs).
  Returns [token stream-after]."
  [stream stop]
  (loop [token (transient []) stream (seq stream)]
    (let [ch (first stream)]
      (if (or (nil? ch) (stop ch))
        [(apply str (persistent! token)) stream]
        (if (= \\ ch)
          (let [[esc stream] (parse-escape (next stream))]
            (recur (conj! token esc) stream))
          (recur (conj! token ch) (next stream)))))))

(defn parse-uri
  [str] (read-until str #(= \> %)))

(defn parse-bnode
  [str] (read-until str #(= \space %)))

(defn parse-literal-type
  [lit stream]
  (let [stream (discard-until stream #(= \^ %) #(= \< %))
        [turi stream] (parse-uri (next stream))
        val (literal-value lit turi)]
    [(or val {:type turi :lit lit}) stream]))

(defn parse-lang
  [lit stream]
  (let [[lang stream] (read-until stream #(= \space %))]
    [{:lit lit :lang lang} stream]))

(defn parse-string
  [stream]
  (let [[s stream] (read-until stream #(= \" %))]
    (condp = (fnext stream)
      \^ (parse-literal-type s (nnext stream))
      \@ (parse-lang s (nnext stream))
      [s stream])))

(defn parse-subject
  "NT subject is <uri> or _:bnode"
  [str]
  (let [str (discard-until str WS #(or (= \< %) (= \_ %)))]
    (condp = (first str)
      \< (parse-uri (next str))
      \_ (parse-bnode (next str))
      str)))

(defn parse-pred
  [str] (parse-uri (next (discard-until str WS #(= \< %)))))

(defn parse-object
  "NT object is: <uri>, _:bnode or \"string\"."
  [str]
  (let [str (discard-until str WS #{\< \_ \"})]
    (condp = (first str)
      \< (parse-uri (next str))
      \_ (parse-bnode (next str))
      \" (parse-string (next str))
      str)))

(defn parse-ntriple
  "NT triple is \"subject <uri> object .\" (trailing dot)"
  [str]
  (let [[s str] (parse-subject str)
        [p str] (parse-pred (next str))
        [o str] (parse-object (next str))
        [_ str] (read-until str #(= \. %))]
    (if (or (empty? s) (empty? p))
      [nil (next str)]
      [[s p o] (next str)])))

(defn parse-ntriples
  "Takes NT string and returns vector of triples."
  [str]
  (loop [acc (transient []), str str]
    (if str
      (let [[spo str] (parse-ntriple str)]
        ;;(prn spo)
        (if spo
          (recur (conj! acc spo) str)
          (recur acc str)))
      (persistent! acc))))

(defn parse-ntriples-lazy
  "Takes NT string and returns lazy-seq of triples."
  [str]
  (lazy-seq
   (when str
     (let [[spo str] (parse-ntriple str)]
       (if spo
         (cons spo (parse-ntriples-lazy str)))))))
