(ns signal.pipeline_schemas.messages
  (:require [malli.core :as m]
            [malli.util :as mu]
            [clojure.test.check.generators :as gen]))

(def ^:private s3-bucket-name?
  "A schema for S3 bucket names."
  (m/-simple-schema
   {:type :aws/s3-bucket-arn
    :pred #(and (string? %)
                (re-matches #"(?!^(\d+\.?)+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$)" %))
    :type-properties {:error/message "Should match an AWS S3 bucket name (3-63 characters containing only lower-case characters, numbers, periods, and dashes.)"
                      :json-schema/type "string"
                      :json-schema/minLength 3
                      :json-schema/maxLength 63
                      :gen/gen #(gen/fmap (fn [& chars] (apply str chars))
                                          (gen/vector (gen/char-alphanumeric) 3 63))}}))

(def path-bucket
  "A schema for path-bucket style messages (legacy)."
  (mu/closed-schema [:map
                     [:path string?]
                     [:bucket s3-bucket-name?]]))

(def document-bucket
  "A schema for document-bucket style messages (legacy)."
  (mu/closed-schema [:map
                     [:document string?]
                     [:bucket s3-bucket-name?]]))

(defn v2
  "Returns a schema for a v2 pipeline message.

   The provided data-labels should be a list of keywords."
  [data-labels]
  {:pre [(and (seq data-labels)
              (every? keyword? data-labels))]}
  (mu/closed-schema
   (let [base-schema [:map
                      [:version [:= 2]]
                      [:id string?]
                      [:data [:map]]]]
     (reduce (fn [schema data-label]
               (mu/assoc-in schema [:data data-label] [:map
                                                       [:key string?]
                                                       [:bucket s3-bucket-name?]]))
             base-schema data-labels))))
(comment
  (v2 [:topics]))

(defn output
  "Returns a schema for a line of pipeline bucket output.

   The provided data-labels should be either:

   1. A list of keywords, in which case they will only be validated to exist
   2. A hash-map of keyword: mali-schema, in which case they will be validated against the given schema."
  [data-labels]
  {:pre [(and (seq data-labels)
              (or
               (and (map? data-labels)
                    (every? keyword? (keys data-labels)))
               (every? keyword? data-labels)))]}
  (let [base-schema [:map {:closed true}
                     [:id string?]]]
    (reduce (fn [schema data-label]
              (let [key-value? (if (keyword? data-label) false (seq data-label))
                    key (if key-value? (first data-label) data-label)
                    value  (if key-value? (second data-label) any?)]
                (mu/assoc schema key value)))
            base-schema data-labels)))

(comment
  (output [:topics])
  (output {:topics [:map
                    [:name string?]]
           :entities any?}))
