(ns clj-uuid.node
  (:require [clj-uuid.util      :refer [java6? compile-if]]
            [clj-uuid.bitmop    :refer [sb8 assemble-bytes ldb dpb mask]]
            [clj-uuid.constants :refer :all]
            [clj-uuid.random    :as random])
  (:import  [java.net           InetAddress
                                NetworkInterface]
            [java.security      MessageDigest]
            [java.util          Properties]))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Clock Sequence                            [RFC4122:4.1.5 "CLOCK SEQUENCE"] ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; For time-based UUID's the "clock-sequence" value is a somewhat counter-
;; intuitively named value that is used to reduce the potential that duplicate
;; UUID's might be generated under unusual situations, such as if the system
;; hardware clock is set backward in time or if, despite all efforts otherwise,
;; a duplecate +node-id+ (see below) happens to be generated. This value is
;; initialized to a random 16-bit number once per lifetime of the system.

(defonce +clock-sequence+ (inc (rand-int 0xffff)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; NodeID Representation                               [RFC4122:4.1.6 "NODE"] ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The representation of NodeID used for consutruction of time-based (v1) UUIDs
;; is a list with the following encoding semantics:
;;
;;               SIZE    TYPE      REPRESENTATION
;;  -----------+------+---------+---------------------------------------------
;;  node       |    6 |  ub48   |  (<BYTE> <BYTE> <BYTE> <BYTE> <BYTE> <BYTE>)
;;
;; prepending two other (computed) bytes to the node-id before
;; bitwise assembly.
;;
;;  (cons clock-high (cons clock-low @+node-id+))
;;
;;
;;      ( <BYTE> . <BYTE> . <BYTE> <BYTE> <BYTE> <BYTE> <BYTE> <BYTE>)
;;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; NodeID Calculation  [RFC4122:4.5 "NODE IDS THAT DO NOT IDENTIFY THE HOST"] ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; This turns out to be surprisingly problematic.  I've tried various
;; approaches.  The most straightforward is the use of IEEE 802 MAC Address:
;;
;;     (.getHardwareAddress
;;       (java.net.NetworkInterface/getByInetAddress
;;         (java.net.InetAddress/getLocalHost))))))
;;
;; Unfortunately got reports of NPE on some platforms (openjdk?).  Also, it
;; discloses the hardware address of the host system -- this is how the
;; creator of the melissa virus was actually tracked down and caught.
;;
;; choosing node-id randomly does not provide consistent generation of UUID's
;; across runtimes.
;;
;; This topic is specifically addressed by the RFC:
;;
;;
;;   "A better solution is to obtain a 47-bit cryptographic quality random
;;   number and use it as the low 47-bits of the Node-ID, with the least
;;   significant bit of the first octet of the Node-ID set to one.  This
;;   bit is the unicast/multicast bit, which will never be set in IEEE 802
;;   addresses obtained from network cards.  Hence, there can never be a
;;   conflict between UUID's generated by machines with and without network
;;   cards."
;;
;;                               . . .
;;
;;   "In addition, items such as the computer's name and the name of the
;;   operating system, while not strictly speaking random, will help
;;   differentiate the results from those obtained by other systems...
;;   ... A generic approach... IS TO ACCUMULATE AS MANY SOURCES AS POSSIBLE
;;   INTO A BUFFER, USE A MESSAGE DIGEST SUCH AS MD5 OR SHA1, TAKE AN
;;   ARBITRARY 6 BYTES FROM THE HASH VALUE, AND SET THE MULTICAST BIT
;;   AS DESCRIBED ABOVE."
;;
;;     -- [RFC4122:4.5 "Node IDs that do not Identify the Host"]
;;
;;
;; We do exactly that.  Taking into account that the term "first octet"
;; in the above excerpt refers to network transmission order, and we
;; 'bit-or' the corresponding bytes:
;;
;;     hi-byte | byte5 | byte4 | byte3 | byte2 | lo-byte
;;    ---------+-------+-------+-------+-------+---------
;;       0x00  |  0x00 |  0x00 |  0x00 |  0x00 |   0x01
;;
;; Thanks to Datastax and to @jjcomer for submitting the original patch
;; from which this current implementation is largely derived.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


(def ^:private datasources ["java.vendor"
                            "java.vendor.url"
                            "java.version"
                            "os.arch"
                            "os.name"
                            "os.version"])

(defn- all-local-addresses []
  (let [^InetAddress local-host (InetAddress/getLocalHost)
        host-name (.getCanonicalHostName local-host)
        base-addresses #{(str local-host) host-name}
        network-interfaces (reduce (fn [acc ^NetworkInterface ni]
                                     (concat acc
                                       (map str (enumeration-seq
                                                  (.getInetAddresses ni)))))
                             base-addresses
                             (enumeration-seq
                               (NetworkInterface/getNetworkInterfaces)))]
    (reduce conj network-interfaces
      (map str (InetAddress/getAllByName host-name)))))

(defn- make-node-id []
    (let [addresses (all-local-addresses)
          ^MessageDigest digest (MessageDigest/getInstance "MD5")
          ^Properties    props  (System/getProperties)
          to-digest (reduce (fn [acc key]
                              (conj acc (.getProperty props key)))
                      addresses datasources)]
      (doseq [^String d to-digest]
        (compile-if (java6?)
          (.update digest (.getBytes d))
          (.update digest
            (.getBytes d java.nio.charset.StandardCharsets/UTF_8))))
      (map bit-or
        [0x00 0x00 0x00 0x00 0x00 0x01]
        (take 6 (seq (.digest digest))))))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Public NodeID API
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(def node-id make-node-id)

(def +node-id+ (delay (assemble-bytes (cons 0 (cons 0 (node-id))))))

(defn- +v1-lsb+'
  []
  (let [clk-high  (dpb (mask 2 6) (ldb (mask 6 8) +clock-sequence+) 0x2)
        clk-low   (ldb (mask 8 0) +clock-sequence+)]
    (dpb (mask 8 56) (dpb (mask 8 48) @+node-id+ clk-low) clk-high)))

(def +v1-lsb+ (memoize +v1-lsb+'))

;; v6 lsb uses a cryptographically secure random node identifier that is
;; initialized at runtime.

(defn- +v6-lsb+'
  []
  (let [clk-high  (dpb (mask 2 6) (ldb (mask 6 8) +clock-sequence+) 0x2)
        clk-low   (ldb (mask 8 0) +clock-sequence+)]
    (dpb (mask 8 56) (dpb (mask 8 48) (random/long) clk-low) clk-high)))

(def +v6-lsb+ (memoize +v6-lsb+'))
