; Copyright (c) Sławek Gwizdowski
;
; Permission is hereby granted, free of charge, to any person obtaining
; a copy of this software and associated documentation files (the "Software"),
; to deal in the Software without restriction, including without limitation
; the rights to use, copy, modify, merge, publish, distribute, sublicense,
; and/or sell copies of the Software, and to permit persons to whom the
; Software is furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included
; in all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
; IN THE SOFTWARE.
;
(ns ^{:author "Sławek Gwizdowski"
      :doc "Essbase ASO export.

* Space separated.
* Quoted member names, non-quoted values.
* Variable column count, last column is always a value, every line is a cell.
* First line is a complete POV, other lines do minimal POV update only.
* So this whole file must be parsed in order...
* ... and all members must be mapped to dimensions properly.

To parse the export file you need to know one thing:

* Complete mapping of member name to dimension name in data storing dimensions.
"}
 szew.essbase.aso
  (:gen-class)
  (:require [szew.io :as io]
            [clojure.java.io :as clj.io :refer [reader]]
            [clojure-csv.core :as csv])
  (:import [java.io BufferedReader]))

(defrecord Members [processor encoding]
  io/Input
  (io/in! [spec source]
    (io! "Reading files here!"
         (with-open [^BufferedReader r (reader source :encoding encoding)]
           (processor (mapcat butlast (csv/parse-csv r :delimiter \space)))))))

(defn members
  "Processor gets member name per every occurrence in file.

  Default processor creates a hash-set of member names.

  It will discard the figures, obviously.
  "
  ([]
   (Members. (partial into (hash-set)) "UTF-8"))
  ([spec]
   (into (members) spec)))

(defrecord Cells [processor m->d encoding]
  io/Input
  (io/in! [spec source]
    (letfn [(row->cell [idx row]
              (with-meta [(->> (butlast row)
                               (mapv (juxt m->d identity))
                               (into (hash-map)))
                          (last row)]
                {:line idx :source source :row row}))
            (ok? [cell]
              (if (contains? (first cell) nil)
                (throw (ex-info "Unknown dimension!"
                                {:cell cell
                                 :line (:line (meta cell))}))
                cell))
            (reductioner [prev curr]
              (with-meta [(merge (first prev) (first curr))
                          (last curr)]
                         (meta curr)))]
      (io! "Reading files here!"
           (with-open [^BufferedReader r (reader source :encoding encoding)]
             (->> (csv/parse-csv r :delimiter \space)
                  (map-indexed row->cell)
                  (map ok?)
                  (reductions reductioner)
                  (processor)))))))

(defn cells
  "Processor will get data point per each cell in file.

  Requires complete member to dimension mapping in m->d.

  Processor gets seq of [{dimension member} value].

  Default processor will return a vector of cells.

  Errors out with ex-info if any member is mapped to nil.
  "
  ([]
   (Cells. vec {} "UTF-8"))
  ([spec]
   (into (cells) spec)))

;; Helper functions -- Members

(defn sniff-unknown
  "Creates a processor for Members that will return a seq of distinct missing
  members.
  "
  [m->d]
  (fn distinct-unknown [member]
    (distinct (filterv (comp nil? m->d) member))))

;; Helper functions -- Cells

(defn sniff-dimensions
  "A processor for Cells that will return dimensions of first cell.
  "
  [cells]
  (into (hash-set) (keys (first (first cells)))))

(defn dump->tsv
  "Given dump files consolidates them into single, row-expanded TSV.
  "
  ([m->d order out-path in-path & in-paths]
   (let [->row (fn [cell] (conj (mapv (first cell) order) (last cell)))
         sink  (io/sink (io/tsv) out-path)
         suck  (cells {:processor (comp sink (partial map ->row))
                       :m->d m->d})]
     (io/in! suck in-path)
     (when (seq in-paths)
       (let [sink+ (io/sink (io/tsv {:append true}) out-path)
             suck+ (cells {:processor (comp sink+ (partial map ->row))
                           :m->d m->d})]
         (doseq [in-path in-paths]
           (io/in! suck+ in-path)))))))
