(ns analysis
  (:gen-class)
  (:import [java.io File])
  (:require [clojure.string :as s]
	    [clojure.contrib.duck-streams :as ds])
  (:use	[clojure.pprint]
        [clojure.contrib.shell-out]))

(def really-huge-number 99999999999999999999)

(defn probability-of-success
  "Koza's Y"
  [success-generations number-of-runs i]
  (let [succeeders (count (filter #(= % i) success-generations))]
    (/ succeeders number-of-runs)))

(defn cumulative-probability-of-success
  "Koza's P"
  [success-generations number-of-runs M i]
  (if (< i 0)
    0
    (+ (probability-of-success success-generations number-of-runs i)
       (cumulative-probability-of-success
	success-generations number-of-runs M (dec i)))))

(defn number-of-independent-runs-required
  "Koza's R"
  [success-generations number-of-runs M i z]
  (let [cum-prob (cumulative-probability-of-success
		  success-generations number-of-runs M i)]
    (if (or (zero? cum-prob)
	    (zero? (- 1 cum-prob)))
      really-huge-number
      (Math/ceil (/ (Math/log (- 1 z))
		    (Math/log (- 1 cum-prob)))))))

(defn individuals-that-must-be-processed
  [success-generations number-of-runs M i z]
  (* M
     (inc i)
     (number-of-independent-runs-required
      success-generations number-of-runs M i z)))

(defn computational-effort
  [success-generations number-of-runs M z G]
  (apply min (map #(individuals-that-must-be-processed
		    success-generations number-of-runs M % z)
		  (range G))))

;; -----------------------------------------------------------------------------------------------------------

(defn str-contains?
  "Return a line if the line contains the regex."
  [s rx]
  (when (> (count (s/split s rx)) 1)
    s))

(defn read-log-file
  [filename]
  #_(println filename)
  (let [fs (slurp filename)
	fs-lines (s/split-lines fs)
	success-str (some #(str-contains?  % #"SUCCESS at generation") fs-lines)	
	total-errs (doall (map #(read-string (last (s/split % #" ")))
			       (filter #(str-contains? % #"Total:") fs-lines)))
	avg-total-errs (doall (map #(read-string (last (s/split % #" ")))
				   (filter #(str-contains? % #"Average total errors in population:") fs-lines)))
	med-total-errs (doall (map #(read-string (last (s/split % #" ")))
				   (filter #(str-contains? % #"Median total errors in population:") fs-lines)))
	]
    {:filename filename
     :best-err (apply min total-errs)
     :total-errs total-errs
     :avg-total-errs avg-total-errs
     :med-total-errs med-total-errs
     :success-gen (if (nil? success-str)
		    nil
		    (read-string (last (s/split success-str #" "))))}))

(defn analyze-logfiles
  [filenames]
  (let [processed-logs (doall (map read-log-file filenames))
	success-gens (doall (filter #(not (nil? %)) (map :success-gen processed-logs)))
	num-runs (count filenames)
	max-gens 1001
	population-size 1000]
    {:num-runs num-runs
     :num-successes (count success-gens)
     :average-success-gen (when-not (empty? success-gens) (float (/ (reduce + success-gens) (count success-gens))))
     :successful-programs (let [success-logs (map #(list (:filename %) (:success-program %))
                                                 (filter #(not (nil? (:success-gen %))) processed-logs))]
                            (when-not (empty? success-logs)
                              (rand-nth success-logs)))
     :mean-best-fitness (float (/ (apply + (map :best-err processed-logs)) (count processed-logs)))
     :computational-effort (computational-effort success-gens num-runs population-size 0.99 max-gens)}))

#_(let [filenames (ds/read-lines (java.io.BufferedReader. *in*))]
  (println "Analyzing" (count filenames) "log files.")
  (println (analyze-logfiles filenames)))

;ls stacktags-dsoar-8x8/* | lein trampoline run analysis

;(defn grouper-fn
;  "Create a grouping function based upon a filepattern."

;; lein -m trampoline run analysis --group-a group-a-*.log --group-b group-b-has-adifferentname-*.log
(defn -main [& args]
  (let [argmap (zipmap (map #(keyword (reduce str (drop 2 %))) (take-nth 2 args))
		       (map #(reduce str (drop 1 %)) (take-nth 2 (drop 1 args))))]
    (println "Beginning analysis...")
    (doseq [[group-name group-pattern] argmap]
;      (let [group (filter (grouper-fn group-pattern) filenames)]
      (let [ls (sh "bash" :in (str "ls " group-pattern))
            group (s/split-lines ls)]
;        (println (str "ls " group-pattern))
;        (println (sh "pwd"))
        (println "Group" group-name "has" (count group) "members.")
;        (println ls)
        #_(doseq [el group]
          (println el))
        (println (analyze-logfiles group))))))