;;; 一些工具
(ns com.kahui.spiders.tools.utils
  (:import [com.kahui.spiders.proxy Proxy ProxyPool]
           [java.util.regex Pattern])
  (:import [java.util.concurrent BlockingQueue]
           [java.util.concurrent TimeUnit BlockingQueue]
           [java.util.concurrent.atomic AtomicBoolean]
           [clojure.lang IFn])
  (:require [clojure.tools.logging :as logging]))

(defn- poll-and-process
  ""
  [^AtomicBoolean running ^IFn process-fn ^BlockingQueue task-queue]
  (if-let [a *agent*]
    (if (.get running)
      (do
        (try
          (if-let [item (.poll task-queue 1 TimeUnit/SECONDS)]
            (do
              (logging/info "Start item " item)
              (process-fn item)
              (logging/info "Finish item " item)))
          (catch Exception e
            (logging/error e "process error"))
          (finally
            (send-off a #(poll-and-process % process-fn task-queue))))
        running)
      (do
        (logging/info "Stop agent " a)))))

(defn agent-batch-process
  "使用agent send-off 批量处理"
  [agents-count process-fn ^BlockingQueue task-queue]
  (let [running (AtomicBoolean. true)
        agents (repeatedly agents-count #(agent running))]
    (doseq [a agents]
      (send-off a #(poll-and-process % process-fn task-queue)))
    {:running running}))

(defn create-proxy-pool
  ([proxy-file ^String proxy-user ^String proxy-password] (create-proxy-pool proxy-file ProxyPool/COOL_DOWN_MS proxy-user proxy-password))
  ([proxy-file cool-down-ms ^String proxy-user ^String proxy-password]
    (let [pool (ProxyPool. cool-down-ms)]
      (doseq [line (line-seq (clojure.java.io/reader proxy-file))
              ]
        (let [[host port user password] (.split line "\\s+")
              u (if (nil? user) proxy-user user)
              p (if (nil? password) proxy-password password)
              session-id (-> (java.util.UUID/randomUUID) str (.replace "-" "") (.toUpperCase))]
          (when (and host port)
            (.put pool (Proxy. host (Integer/parseInt port) 0 u p session-id)))))
      pool)))


(defn re-match-group
  [^Pattern re-exp ^String content ^Integer group-index]
  (when content
    (let [matcher (re-matcher re-exp content)
          ret (if (.find matcher) (.trim (.group matcher group-index)))]
      ret)))

(defn write-gzip-file
  "将内容以gzip压缩写入到文件中"
  [file content]
  (with-open [w (-> file
                  clojure.java.io/output-stream
                  java.util.zip.GZIPOutputStream.
                  clojure.java.io/writer)]
    (binding [*out* w]
      (print content))))

(defn read-gzip-file
  "读取gzip的内容,返回String"
  [file]
  (with-open [in (java.util.zip.GZIPInputStream. (clojure.java.io/input-stream file))]
    (slurp in)))


(defn read-file-by-suffix
  "依据文件后缀读取文件内容"
  [file]
  (let [f (clojure.java.io/file file)
        f-name (.getName f)]
    (cond
      (.endsWith f-name ".gz") (read-gzip-file file)
      :else (slurp file))))
