(ns jre2
  (:use [reader-macros.core :only [set-dispatch-macro-character macro-read-regex]])
  (:require [clojure.string :as str])
  (:import [jre2 JRE2 StringUTF8 StringUTF8Buffer])
  (:refer-clojure :exclude [re-pattern re-matcher re-matches re-find re-seq re-groups replace]))


(def ^:dynamic *use-java-regexes* false)

(defmacro with-java-regexes
  "Switches all the re- functions to use the Java regex engine instead of RE2."
  [& body]
  `(binding [*use-java-regexes* true]
     ~@body))


(def ^:dynamic *return-utf8-strings* false)

(defmacro with-return-utf8-strings
  "Binds *return-utf8-strings* to true so that the functions re-groups, re-seq, re-find and re-matches
   return StringUTF8 instances instead of Strings."
  [& body]
  `(binding [*return-utf8-strings* true]
     ~@body))


;; When this is bound to an atom containing a list,
;; re-pattern adds JRE2 instances to the list so they can be closed later.
(def ^:dynamic *re-close-cache* nil)

(defn init-close-cache
  "Initialize *re-close-cache* so that re-pattern will cache each new JRE2 instance
   so they can be closed later."
  []
  (when-not *re-close-cache*
    (def ^:dynamic *re-close-cache* (atom ()))))


(defn re-pattern
  "Returns a compiled regex, either an instance of JRE2 or of java.util.regex.Pattern.
   An optional second argument is the max memory size for the compiled regex.
   Adds the JRE2 instance to *re-close-cache* if that var is non-nil."
  ([re-str]
     (re-pattern re-str nil))
  ([re-str max-mem]
     (if *use-java-regexes*
       (clojure.core/re-pattern re-str)
       (let [re2 (if max-mem
                   (JRE2. re-str max-mem)
                   (JRE2. re-str))]
         (when *re-close-cache*
           (swap! *re-close-cache*
                  #(conj % re2)))
         re2))))

;; Define a ##" reader macro for JRE2, similar to #" for Java Pattern regexs.
;; E.g. ##"foo\sbar" expands into (re-pattern "foo\\sbar").
(set-dispatch-macro-character
 \#
 (fn [reader c]
   ;; \" is ASCII 34
   (if (= 34 (.read reader))
     `(re-pattern
       (str ~(macro-read-regex reader \")))
     (throw (Exception. "Unsupported dispatch character after ##")))))


(defmulti re-close
  "Closes an instance of JRE2, freeing the allocated memory.
   Does nothing for instances of java.util.regex.Pattern."
  class)

(defmethod re-close JRE2
  [^JRE2 re]
  (.close re))

(defmethod re-close java.util.regex.Pattern
  [re])


(defn re-close-all
  []
  (if *re-close-cache*
    (map re-close @*re-close-cache*)
    (def ^:dynamic *re-close-cache* nil)))


(defmacro with-auto-re-close
  "Binds *re-close-cache* to an atom so that re-pattern will add JRE2 instances to the atom.
   Calls re-close on all the JRE2 instances before exiting."
  [& body]
  `(binding [*re-close-cache* (atom ())]
     (let [ret# (do ~@body)]
       (map re-close @*re-close-cache*)
       ret#)))


(defmulti re-matcher
  "When re is a JRE2, returns an instance of jre2.Matcher for use, e.g. in re-find.
   s is either a String or an instance of jre2.StringUTF8.

   When re is a java.util.regex.Pattern, returns an instance of java.util.regex.Matcher.
   s is a String."
  (fn [re s] (class re)))

(defmethod re-matcher JRE2
  [^JRE2 re s]
  (.matcher re s))

(defmethod re-matcher java.util.regex.Pattern
  [re s]
  (clojure.core/re-matcher re s))


(defmulti re-groups
  "Returns the groups from the most recent match/find as Strings for java.util.regex.Matcher or
   {Strings or StringUTF8 objects} for jre2.Matcher (depending on *return-utf8-strings*).
   If there are no nested groups, returns a String or StringUTF8 of the entire match.
   If there are nested groups, returns a vector of the groups, the first element being the entire match."
  class)

(defmethod re-groups jre2.Matcher
  [^jre2.Matcher m]
  (if *return-utf8-strings*
    (let [gc (. m (groupCount))]
      (if (zero? gc)
        (. m (groupUTF8))
        (loop [ret [] c 0]
          (if (<= c gc)
            (recur (conj ret (. m (groupUTF8 c))) (inc c))
            ret))))
    (let [gc (. m (groupCount))]
      (if (zero? gc)
        (. m (group))
        (loop [ret [] c 0]
          (if (<= c gc)
            (recur (conj ret (. m (group c))) (inc c))
            ret))))))

(defmethod re-groups java.util.regex.Matcher
  [m]
  (clojure.core/re-groups m))


(defmulti re-seq
  "Returns a lazy sequence of successive matches of pattern in string,
   using either jre2.Matcher.find() or java.util.regex.Matcher.find(),
   each such match processed with re-groups."
  (fn [re s] (class re)))

(defmethod re-seq JRE2
  [^JRE2 re s]
  (let [m (re-matcher re s)]
    ((fn step []
       (when (. m (find))
         (cons (re-groups m) (lazy-seq (step))))))))

(defmethod re-seq java.util.regex.Pattern
  [re s]
  (clojure.core/re-seq re s))


(defmulti re-matches
  "Returns the match, if any, of string to pattern, using either jre2.Matcher.matches() or
   java.util.regex.Matcher.matches(). Uses re-groups to return the groups."
  (fn [re s] (class re)))

(defmethod re-matches JRE2
  [^JRE2 re s]
    (let [m (re-matcher re s)]
      (when (. m (matches))
        (re-groups m))))

(defmethod re-matches java.util.regex.Pattern
  [re s]
  (clojure.core/re-matches re s))


(defmulti re-find
  "Returns the next regex match, if any, of string to pattern, using either jre2.Matcher.find() or
   java.util.regex.Matcher.find(). Uses re-groups to return the groups."
  (fn [re-or-matcher & _] (class re-or-matcher)))

(defmethod re-find JRE2
  [re s]
  (let [m (re-matcher re s)]
    (re-find m)))

(defmethod re-find jre2.Matcher
  [^jre2.Matcher m]
  (when (. m (find))
    (re-groups m)))

(defmethod re-find java.util.regex.Pattern
  [re s]
  (clojure.core/re-find re s))

(defmethod re-find java.util.regex.Matcher
  [m]
  (clojure.core/re-find m))


(defn- utf8-string
  [s]
  (if (instance? StringUTF8 s)
    s
    (StringUTF8. (.toString s))))


(defn- split-internal
  [^JRE2 re s limit]
  (let [a (.split re
                  (utf8-string s)
                  limit)]
    (if *return-utf8-strings*
      (vec a)
      (vec (map str a)))))


(defmulti split
  "Splits a String or StringUTF8 instance on a regular expression.
   Has similar parameters and behavior to clojure.string/split.
   Optional argument limit is the maximum number of splits.
   Returns vector of the splits."
  (fn [s re & _] (class re)))

(defmethod split JRE2
  ([s re]
     (split-internal re s 0))
  ([s re limit]
     (split-internal re s limit)))

(defmethod split java.util.regex.Pattern
  ([s re]
     (str/split s re))
  ([s re limit]
     (str/split s re limit)))


(defn- replace-by
  [s ^jre2.Matcher m f]
  (let [buffer (StringUTF8Buffer. (.length s))]
    (loop []
      (if (.find m)
        (do (.appendReplacement m buffer (utf8-string (f (re-groups m))))
            (recur))
        (do (.appendTail m buffer)
            (.toStringUTF8 buffer))))))

(defmulti replace
  "Replaces all matches of 'match' in 's' with 'replacement'.
   Has similar parameters and behavior to clojure.string/replace.
   When 'match' is an instance of JRE2, 'replacement' can be a CharSequence
   (such as a String) or a StringUTF8, or a function that takes the value of
   re-groups for each match and produces a String or StringUTF8 as the replacement.
   When 'replacement' is a StringUTF8 or a function, the return value of replace
   is either a String or StringUTF8, depending on *return-utf8-strings*."
  (fn [s match replacement] (class match)))

(defmethod replace JRE2
  [s match replacement]
  (let [m (re-matcher ^JRE2 match s)]
    (if (instance? CharSequence replacement)
      (.replaceAll m (.toString ^CharSequence replacement))
      (let [r (if (instance? StringUTF8 replacement)
                (.replaceAll m replacement)
                (replace-by s m replacement))]
        (if *return-utf8-strings*
          r
          (str r))))))

(defmethod replace :default
  [s match replacement]
  (str/replace s match replacement))


(defn- replace-first-by
  [s ^jre2.Matcher m f]
  (let [buffer (StringUTF8Buffer. (.length s))]
    (when (.find m)
      (.appendReplacement m buffer (utf8-string (f (re-groups m))))
      (.appendTail m buffer)
      (.toStringUTF8 buffer))))

(defmulti replace-first
  "Replaces the first match of 'match' in 's' with 'replacement'.
   Has similar parameters and behavior to clojure.string/replace-first.
   When 'match' is an instance of JRE2, 'replacement' can be a CharSequence
   (such as a String) or a StringUTF8, or a function that takes the value of
   re-groups for the match and produces a String or StringUTF8 as the replacement.
   When 'replacement' is a StringUTF8 or a function, the return value of replace-first
   is either a String or StringUTF8, depending on *return-utf8-strings*."
  (fn [s match replacement] (class match)))

(defmethod replace-first JRE2
  [s match replacement]
  (let [m (re-matcher ^JRE2 match s)]
    (if (instance? CharSequence replacement)
      (.replaceFirst m (.toString ^CharSequence replacement))
      (let [r (if (instance? StringUTF8 replacement)
                (.replaceFirst m replacement)
                (replace-first-by s m replacement))]
        (if *return-utf8-strings*
          r
          (str r))))))

(defmethod replace-first :default
  [s match replacement]
  (str/replace-first s match replacement))


(defn- make-utf8-bindings
  "Recursive helper function for with-utf8-strings. Not intended for use by itself."
  [bindings]
  (if (empty? bindings) []
    (let [[name expr & rest] bindings]
      `[~name (utf8-string ~expr)
        ~@(make-utf8-bindings rest)])))

(defmacro with-utf8-bindings
  "Let-bind names in the bindings to StringUTF8 instances constructed from the given Strings."
  [bindings & body]
  (let [utf8-bindings (make-utf8-bindings bindings)]
    `(let ~utf8-bindings
       ~@body)))


(defn- make-regex-bindings
  "Recursive helper function for with-regexes. Not intended for use by itself."
  [bindings]
  (if (empty? bindings) []
    (let [[name expr & rest] bindings
          [next-name next-expr & next-rest] rest]
      (if (= :mem next-name)
        `[~name (re-pattern ~expr ~next-expr)
          ~@(make-regex-bindings next-rest)]
        `[~name (re-pattern ~expr)
          ~@(make-regex-bindings rest)]))))

(defn- make-close-calls
  "Helper function for with-regexes. Not intended for use by itself."
  [bindings]
  (for [name (partition 2 bindings)]
    `(re-close ~(first name))))

(defmacro with-regex-bindings
  "Let-bind names in the bindings to compiled regexs, either instances of JRE2 or of java.util.regex.Pattern.
   Bindings may be followed by :mem and a max memory size. E.g.
     (with-regexes [r1 \"a.*b\" :mem 100000
                    r2 \"a.b\"]
       (re-matcher r1 \"abab\"))"
  [bindings & body]
  (let [regex-bindings (make-regex-bindings bindings)
        close-calls (make-close-calls regex-bindings)]
    `(let ~regex-bindings
       (let [ret# (do ~@body)]
         ~@close-calls
         ret#))))
