;;   Copyright (c) Zachary Tellman. All rights reserved.
;;   The use and distribution terms for this software are covered by the
;;   Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
;;   which can be found in the file epl-v10.html at the root of this distribution.
;;   By using this software in any fashion, you are agreeing to be bound by
;;   the terms of this license.
;;   You must not remove this notice, or any other, from this software.

(ns lamina.stats
  (:use
    [lamina.core.utils :only (log-warn)]
    [lamina core api]
    [lamina.core.channel :only (mimic)]
    [lamina.stats.utils :only (update)])
  (:require
    [lamina.time :as t]
    [lamina.stats.sample :as sample]
    [lamina.stats.moving-average :as avg]
    [lamina.stats.variance :as var]
    [lamina.stats.math :as math])
  (:import
    [java.util.concurrent.atomic
     AtomicLong
     AtomicBoolean]))

;;;

(defn moving-sample
  "Accumulates a representative sample of values passing through the channel, biased towards values within the
   last `window` milliseconds.

   The current sample is emitted every `period` milliseconds."
  ([ch]
     (moving-sample nil ch))
  ([{:keys [window period sample-size task-queue] :as options} ch]
     (let [sampler (sample/moving-sampler options)]
       (bridge-accumulate ch (mimic ch) "moving-sample"
         (merge options
           {:accumulator #(update sampler %)
            :emitter #(deref sampler)})))))

(defn sample
  "Accumulates a representative sample of values passing through the channel.

   The current sample is emitted every `period` milliseconds."
  ([ch]
     (sample nil ch))
  ([{:keys [window period sample-size task-queue] :as options} ch]
     (let [sampler (sample/sampler options)]
       (bridge-accumulate ch (mimic ch) "sample"
         (merge options
           {:accumulator #(update sampler %)
            :emitter #(deref sampler)})))))

;;;

(defn- number-accumulator [name f]
  (let [warn-str (format "non-numerical value in '%s':" name)]
    {:accumulator (fn [n]
                    (if-not (number? n)
                      (log-warn warn-str (pr-str n))
                      (f n)))}))

(defn sum
  "Returns a channel that will periodically emit the sum of all messages emitted by the source
   channel over the last `period` milliseconds, with a default of 1000.

   It is assumed that all numbers emitted by the source channel are integral values."
  ([ch]
     (sum nil ch))
  ([{:keys [period task-queue] :as options} ch]
     (let [cnt (AtomicLong. 0)]
       (bridge-accumulate ch (mimic ch) "sum"
         (merge options
           (number-accumulator "sum"
             (fn [n]
               (loop []
                 (let [current (.get cnt)
                       val (Double/longBitsToDouble (long current))]
                   (when-not (.compareAndSet cnt current
                               (Double/doubleToRawLongBits
                                 (+ (double val) (double n))))
                     (recur))))))
           {:emitter #(Double/longBitsToDouble
                        (.getAndSet cnt
                          (Double/doubleToRawLongBits 0)))})))))

(defn rate
  "Returns a channel that will periodically emit the number of messages emitted by the source
   channel over the last `period` milliseconds, with a default of 1000."
  ([ch]
     (rate nil ch))
  ([{:keys [period task-queue] :as options} ch]
     (let [cnt (AtomicLong. 0)]
       (bridge-accumulate ch (mimic ch) "rate"
         (merge options
           {:accumulator (fn [_] (.incrementAndGet cnt))
            :emitter (fn [] (.getAndSet cnt 0))})))))

(defn moving-average
  "Returns a channel that will periodically emit the moving average over all messages emitted by
   the source channel every `period` milliseconds, defaulting to once every five seconds.  This
   moving average is exponentially weighted to the last `window` milliseconds, defaulting to the
   last five minutes."
  ([ch]
     (moving-average nil ch))
  ([{:keys [period window task-queue]
     :or {window (t/minutes 5)
          period (t/period)}
     :as options}
    ch]
     (let [avg (avg/moving-average period window)]
       (bridge-accumulate ch (mimic ch) "moving-average"
         (merge options
           (number-accumulator "moving-average" #(update avg %))
           {:emitter #(deref avg)})))))

(defn moving-quantiles
  "Returns a channel that will periodically emit a map of quantile values every `period`
   millseconds, which represent the statistical distribution of values emitted by the source
   channel, weighted towards the last `window` milliseconds.

   The map will be of quantile onto quantile value, so for a uniform distribution of values from
   1..1000, it would emit

     {0.5 500, 0.75 750, 0.95 950, 0.99 990, 0.999 999}

   By default, the above quantiles will be used, these can be specified as a sequence of quantiles
   of the form [0.5 0.75 0.99 0.999]."
  ([ch]
     (moving-quantiles nil ch))
  ([{:keys [period
            window
            quantiles
            task-queue
            sample-size]
     :or {quantiles [0.5 0.75 0.95 0.99 0.999]
          task-queue (t/task-queue)
          window (t/minutes 5)}
     :as options}
    ch]
     (let [sampler (sample/moving-sampler options)
           ch* (bridge-accumulate ch (mimic ch) "moving-quantiles"
                 (merge options
                   (number-accumulator "moving-quantiles" #(update sampler %))
                   {:emitter #(deref sampler)}))]
       (map*
         #(zipmap quantiles (math/quantiles % quantiles))
         ch*))))

(defn quantiles
  "Returns a channel that will periodically emit a map of quantile values every `period`
   millseconds, which represent the statistical distribution of values emitted by the source
   channel.

   The map will be of quantile onto quantile value, so for a uniform distribution of values from
   1..1000, it would emit

     {0.5 500, 0.75 750, 0.95 950, 0.99 990, 0.999 999}

   By default, the above quantiles will be used, these can be specified as a sequence of quantiles
   of the form [0.5 0.75 0.99 0.999]."
  ([ch]
     (moving-quantiles nil ch))
  ([{:keys [period
            quantiles
            task-queue
            sample-size]
     :or {quantiles [0.5 0.75 0.95 0.99 0.999]
          task-queue (t/task-queue)}
     :as options}
    ch]
     (let [sampler (sample/sampler options)
           ch* (bridge-accumulate ch (mimic ch) "quantiles"
                 (merge options
                   (number-accumulator "quantiles" #(update sampler %))
                   {:emitter #(deref sampler)}))]
       (map*
         #(zipmap quantiles (math/quantiles % quantiles))
         ch*))))

(defn variance
  "Returns a channel that will periodically emit the variance of all values emitted by the source
   channel every `period` milliseconds."
  ([ch]
     (variance nil ch))
  ([{:keys [period task-queue]
     :as options}
    ch]
     (let [vr (atom (var/create-variance))]
       (bridge-accumulate ch (mimic ch) "variance"
         (merge options
           (number-accumulator "variance" #(swap! vr update %))
           {:emitter #(var/variance @vr)})))))

(defn- abs [x]
  (Math/abs (double x)))

(defn outliers
  "Returns a channel that will emit outliers from the source channel, as measured by the standard
   deviations from the mean value of (facet msg).  Outlier status is determined by
   'variance-predicate', which is given the standard deviations from the mean, and returns true
   or false.  By default, it will return true for any value where the absolute value is greater
   than three.

   For instance, to monitor function calls that take an unusually long or short time via a
   `return` probe:

     (outliers :duration (probe-channel :name:return))

   To only receive outliers that are longer than the mean, define a custom :predicate

     (outliers
       :duration
       {:window (lamina.time/minutes 15)
        :variance-predicate #(< % 3)}
       (probe-channel :name:return))

   :window describes the window of the moving average, which defaults to five minutes.  This can
   be used to adjust the responsiveness to long-term changes to the mean."
  ([facet ch]
     (outliers facet nil ch))
  ([facet
    {:keys [window
            variance-predicate
            task-queue]
     :or {window (t/minutes 5)
          variance-predicate #(< 3 (abs (double %)))
          task-queue (t/task-queue)}}
    ch]
     (let [avg (avg/moving-average (t/seconds 5) window)
           vr (atom (var/create-variance))
           ch* (mimic ch)
           predicates (periodically 5000
                        (fn []
                          (let [mean @avg
                                std-dev (var/std-dev @vr)]
                            (when-not (zero? std-dev)
                              #(variance-predicate (/ (- (facet %) mean) std-dev)))))
                        task-queue)
           f (atom-sink predicates)]
       
       (on-drained ch #(close predicates))

       (bridge-join ch ch* "outliers"
         (fn [msg]
           (when-let [val (facet msg)]
             (update avg val)
             (swap! vr update val)
             (when-let [f @f]
               (when (f val)
                 (enqueue ch* msg))))))
       ch*)))
