(ns cjd.vox.vocalizer
  (:use
    [cjd.vox.vox-utilz]
    [cjd.vox.vocalization-maps]
    [cjd.util.string-utils]
    )
  )


;(def token-mods (token-rep-map token-modifications))

;(def prefix-mods (prefix-rep-list prefix-modifications))

(defn flat-map [func coll] 
  (let [x (flatten (map func coll))]
    ;(println "---" x)
    x))

(defmacro whiz [wfn targets] 
  (let [p (gensym) wfn* (concat wfn (list p))]
    `(map (fn [~p] ~wfn*) ~targets)))

(defmacro fwhiz [wfn targets] 
  (let [p (gensym) wfn* (concat wfn (list p))]
    `(flatten (map (fn [~p] ~wfn*) ~targets))))


#_ (* Accepts a string which notionally represents a Java (or similar) identifier
      and uses it to generate a collection of strings that are plausible 
      vocalized representations of the identifier.
     
      @p Sequence of operations\:
      @(ol
         @li ?
         @li "xxx" → " xxx ". We're fundamentally generating a string of tokens
         here, where the tokens are separated by spaces. This ensures that tests
         for tokens beginning and ending work across the entire string, and we 
         don't have to special-case start and end of string.
         @li "xxx.yyy" → ["xx yy" "xx dot yy"]
         @li "xxx-yyy" → ["xx yy"]
         @li "xxxYyyyZzzz" → "xxx Yyyy Zzzz"
         @li "aaaa9876" → "aaaa 9876"
         @li "1234zzzz" → "1234 zzzz"
         @li "xxx?" → "xxx question" 
         @li "xxx!" → "xxx bang" 
         @li " _xxx" and "xxx_ " → "underscore xxx" and "xxx underscore", respectively.
         We only pronounce underscores at the start and end of IDs.
         @li "xxx?yyy" → "xxx yyy", where "?" is any of "_(),". Note that 
         underscores in the middle are just separators.
         @li "X9X9_ZZZ" → "X9X9 ZZZ". This deals with upper-cased IDs that have words
         separated by underscores.
         @li "1234" → ["1 2 3 4" "1234"]. Digits individually, or as a number.
         @li " 0 " → [" 0 ", " o "]. Zero as "zero" or "o".
         @li Replace tokens prefixes from the @(link prefix-mods) list with their modified strings.
         @li Replace entire tokens with replacements from the @(link token-mods) list.
         @li "XYZ" → "X Y Z". This deals with any remaining strings of upcased lettes, notably
         abbreviations.
         @li Condense multiple spaces to a single space.
         @li Remove spaces at the beginning
         @li Remove spaces at the end
         @li Lower-case everything.
        )
     

;      @(session
;         (id-vocalizer "JavaIdentifier")
;         ["java identifier"] 
;         (id-vocalizer "javax.thing.JavaIdentifier")
;         ["java x dot thing dot java identifier" "java x thing java identifier"] 
;         )
      @arg term A term to be "vocalized".
      @returns A collection of strings representing alternative possible
      vocalizations for @(arg term).
      )
(defn id-vocalizer [^String term]
  (->> 
    term
    ((fn [string] 
      (if (re-matches #"[\p{Upper}\d_]+" string)
        (.toLowerCase string)
        string)))
    ((fn [string] (str " " string " ")))
    ((fn [string] 
       (if (re-find #"\." string)
         [(re-replace-all #"\." " dot " string) (re-replace-all #"\." " " string)]
         [string])))
    (whiz (re-replace-all #"-" " "))
    (whiz (re-replace-all #"(\p{Lower})(\p{Upper})" "$1 $2"))
    (whiz (re-replace-all #"(\p{Alpha})(\p{Digit})" "$1 $2"))
    (whiz (re-replace-all #"(\p{Digit})(\p{Alpha})" "$1 $2"))
    (whiz (re-replace-all #" _|_ " " underscore "))
    (whiz (re-replace-all #"\?" " question "))
    (whiz (re-replace-all #"!" " bang "))
    (whiz (re-replace-all #"\*" " star "))
    (whiz (re-replace-all #"\$" " dollar "))
    (whiz (re-replace-all #"[_(),]" " "))
    (whiz (re-repeat #" ([\p{Upper}\d_]+)_" "$1 "))
    (fwhiz 
      ((fn [string]
         (if (re-find #"\d{2,}" string)
           [string (re-repeat #"(\d)(\d)" "$1 $2" string)]
           string))))
    (fwhiz 
      ((fn [string]
         (if (re-find #" 0 " string)
           [string (re-replace-all #" 0 " " o " string)]
           string))))
    (whiz 
      ((fn [string]
         (reduce (fn [s [pref mod]] (re-replace-all pref mod s)) string prefix-mods ))))
    (whiz 
      ((fn [string]
         (reduce (fn [s [tok mod]] (re-replace-all tok mod s)) string token-mods ))))
    (whiz (re-repeat #"(\p{Upper})(\p{Upper})" "$1 $2"))
    (whiz (re-replace-all #"  " " "))
    (whiz (re-replace-first #"^ +" ""))
    (whiz (re-replace-first #" +$" ""))
    (whiz (.toLowerCase))
    ))

