(ns jlk.data.office.pdf
  (:use [clojure.repl]
        [jlk.repl]
        [jlk.utility :only [not-nil?]])
  (:import [org.apache.pdfbox.pdmodel PDDocument PDPage]
           [org.apache.pdfbox.pdmodel.common PDRectangle]
           [org.apache.pdfbox.pdmodel.graphics.color PDGamma]
           [org.apache.pdfbox.pdmodel.interactive.annotation PDAnnotationSquareCircle PDBorderStyleDictionary PDAnnotationText PDAnnotation]))

;;
;; this actually works pretty well...
;; 

(defn test
  []
  (let [doc (PDDocument.)
        page (PDPage.)
        ann (.getAnnotations page)]
    (.add ann
          (doto (PDAnnotationSquareCircle. PDAnnotationSquareCircle/SUB_TYPE_SQUARE)
            (.setContents "square annotation")
            (.setColour (doto (PDGamma.)
                          (.setR 1)))
            (.setBorderStyle (doto (PDBorderStyleDictionary.)
                               (.setWidth 1)))
            (.setRectangle (doto (PDRectangle.)
                             (.setLowerLeftX 1)
                             (.setLowerLeftY 2)
                             (.setUpperRightX 100)
                             (.setUpperRightY 200)))))
    (.addPage doc page)
    (.save doc "/tmp/out.pdf")))

(defn test2
  "this will read a pdf and get the non-nil text for all annotations."
  []
  (with-open [doc (PDDocument/load "/tmp/out2.pdf")]
    (let [ann (seq (-> doc .getDocumentCatalog .getAllPages (.get 0) .getAnnotations))]
      (filter not-nil? (map #(.getContents %) ann)))))

;;
;; these are for pseudo-twitter like inline tagging
;;

(defn find-tags
  [x]
  (flatten (filter not-nil? (map #(re-seq #"#[A-z0-9]+" %) x))))

(defn find-long-tags
  [x]
  (flatten (filter not-nil? (map #(re-seq #"#\"[A-z0-9 ]+\"" %) x))))

(defn find-users
  [x]
  (flatten (filter not-nil? (map #(re-seq #"@[A-z,0-9]+" %) x))))

(defn find-long-users
  [x]
  (map #(.substring % 2 (dec (.length %))) (flatten (filter not-nil? (map #(re-seq #"@\"[A-z0-9 ]+\"" %) x)))))
