(ns burningswell.wikipedia.languages
  (:refer-clojure :exclude [replace])
  (:require [clj-http.client :as http]
            [clojure.string :refer [replace]]
            [net.cgrand.enlive-html :refer :all]))

(def ^:dynamic *url*
  "http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes")

(defn- fetch [url]
  (-> {:as :stream
       :method :get
       :url url}
      http/request
      :body
      html-resource))

(defn- select-table [document]
  (first (select document [:table.wikitable])))

(defn- select-rows [table]
  (select table [[:tr (but first-of-type)]]))

(defn- select-content [node selector]
  (first (:content (first (select node selector)))))

(defn- select-family [row]
  (if-let [family (select-content row [[:td (nth-child 2)] :a])]
    (replace family "–" "-")))

(defn- select-name [row]
  (if-let [name (select-content row [[:td (nth-child 3)] :a])]
    (replace name "–" "-")))

(defn- select-iso-639-1 [row]
  (select-content row [[:td (nth-child 5)]]))

(defn- select-iso-639-2 [row]
  (select-content row [[:td (nth-child 6)]]))

(defn- select-language [row]
  {:name (select-name row)
   :family (select-family row)
   :iso-639-1 (select-iso-639-1 row)
   :iso-639-2 (select-iso-639-2 row)})

(defn select-languages
  "Select the ISO-639-1 list of languages from the Wikipedia document."
  [document] (map select-language (select-rows (select-table document))))

(defn languages
  "Fetch and parse the ISO-639-1 list of languages from Wikipedia."
  [& [url]] (set (select-languages (fetch (or url *url*)))))

(defn families
  "Fetch the Wikipedia languages families from `url`."
  [& [url]]
  (set (map :family (languages url))))
