(ns utilities.jsoup
  (:import (org.jsoup Jsoup)
           (org.jsoup.select Elements)
           (org.jsoup.nodes Node Element)))


(defn parse
  "Parses a string representing a full HTML document
   into Jsoup."
  [^String html]
  (when html
    (Jsoup/parse html)))


(defn select
  "Given HTML parsed to Jsoup and a string representing
   a CSS-esque selector, select* returns Jsoup representing
   any successfully selected data.
   For more on selector syntax, see:
   http://jsoup.org/cookbook/extracting-data/selector-syntax"
  [html ^String css-selector]
  (let [^Node node        (if (string? html)
                            (parse html)
                            html)
        ^Elements result (.select node css-selector)]
    (if (.isEmpty result)
      nil
      result)))


(defprotocol Selectable
  "Protocol for selecting data from DOM-like data structures"
  (attrs [_] "Return the attributes of the provided HTML elements")
  (text [_] "Return the text for just this element")
  (text-content [_] "Return the text for this element and all child elements"))

(extend-type Elements
  Selectable
  (attrs [this]
    (map attrs this))

  (text [this]
    (.text this))

  (text-content [this]
    (.text this)))

(extend-type Element
  Selectable
  (attrs [this]
    (reduce
      (fn [res attr]
        (assoc res (keyword (.getKey attr)) (.getValue attr)))
      {}
      (.attributes this)))

  (text [this]
    (.ownText this))

  (text-content [this]
    (.text this)))
