;;; 一些杂项的工具

(ns com.kahui.spiders.tools.misc
  (:import [org.jsoup Jsoup]
           [java.net URL]
           [clojure.lang ISeq])
  (:require [clojure.java.io :as io]))

(defn- extract-fn
  [a index]
  (-> a (.child index) .text))

(defn- write-proxy
  [high-anonymous-proxy out-file]
  (when-not (nil? out-file)
    (with-open [outer (io/writer (io/file out-file))]
      (doseq [proxy high-anonymous-proxy]
        (.write outer (str (:host proxy) " " (:port proxy) "\n"))))))

(defonce global-url "http://cn-proxy.com/archives/218")
(defonce china-url "http://cn-proxy.com/")
(defn resolve-proxy-from-cn-proxy
  "从http://cn-proxy.com/ 解析高匿名代理服务器"
  ([url]
    (let [document (Jsoup/parse (URL. url) 10000)
          proxy-trs (.select document "table.sortable tbody tr")
          proxys (map (fn [t] {:host (extract-fn t 0)
                               :port (extract-fn t 1)
                               }) proxy-trs)
          high-anonymous-proxy proxys]
      high-anonymous-proxy))
  ([url out-file]
    (let [high-anonymous-proxy (resolve-proxy-from-cn-proxy url)]
      (write-proxy high-anonymous-proxy out-file))))

(defn resolve-proxy-from-cn-letushide
  ([targets]
    (let [targe-seq (if (coll? targets) (seq targets) (seq [targets]))
          parse-fn (fn [target]
                     (let [document (Jsoup/parse (io/file target) nil)
                           proxy-trs (.select document "table#basic tr")
                           proxys (map (fn [t] {:host (extract-fn t 1)
                                                :port (extract-fn t 2)
                                                :type (extract-fn t 4)}) proxy-trs)
                           high-anonymous-proxy (filter #(= "HAP" (:type %)) proxys)]
                       high-anonymous-proxy))]
      (flatten (map parse-fn targe-seq))))
  ([targets out-file]
    (let [high-anonymous-proxy (resolve-proxy-from-cn-letushide targets)]
      (write-proxy high-anonymous-proxy out-file))))
