(ns antistock.db.wikipedia
  (:refer-clojure :exclude [distinct group-by update])
  (:require [clj-time.coerce :refer [to-sql-time]]
            [datumbazo.core :refer :all]))

(deftable wikipedia-pages
  "The Wikipedia pages database table."
  (table :wikipedia.pages)
  (column :id :serial)
  (column :url :text :not-null? true :unique? true)
  (column :title :text)
  (column :created-at :timestamp-with-time-zone :not-null? true :default "now()")
  (column :updated-at :timestamp-with-time-zone :not-null? true :default "now()")
  (primary-key :url))

(deftable wikipedia-page-edits
  "The Wikipedia page edits database table."
  (table :wikipedia.page-edits)
  (column :id :serial :primary-key? true)
  (column :page-id :integer :not-null? true :references :wikipedia.pages/id)
  (column :author :text)
  (column :size :integer :not-null? true)
  (column :edited-at :timestamp-with-time-zone :not-null? true)
  (column :created-at :timestamp-with-time-zone :not-null? true :default "now()")
  (column :updated-at :timestamp-with-time-zone :not-null? true :default "now()"))

(deftable wikipedia-page-views
  "The Wikipedia page edits database table."
  (table :wikipedia.page-views)
  (column :id :serial :primary-key? true)
  (column :page-id :integer :not-null? true :references :wikipedia.pages/id)
  (column :language-id :integer :not-null? true :references :languages/id)
  (column :count :integer :not-null? true)
  (column :date :date :not-null? true)
  (column :created-at :timestamp-with-time-zone :not-null? true :default "now()")
  (column :updated-at :timestamp-with-time-zone :not-null? true :default "now()")
  (primary-key :page-id :language-id :date))

(defquery1 count-page-views-at-day
  "Count all page views at `date`."
  [db time]
  (select db ['(count *)]
    (from :wikipedia.page-views)
    (where `(= (date_trunc "day" (cast :page-views.date :timestamp))
               (date_trunc "day" (cast ~(to-sql-time time) :timestamp)))))
  :count)

(defquery1 count-page-views-at-hour
  "Count all page views at `date`."
  [db time]
  (select db ['(count *)]
    (from :wikipedia.page-views)
    (where `(= (date_trunc "hour" (cast :page-views.date :timestamp))
               (date_trunc "hour" (cast ~(to-sql-time time) :timestamp)))))
  :count)

(defquery update-wikipedia-page-titles
  "Update the Wikipedia page titles from their url."
  [db]
  (update db :wikipedia.pages
          '((= :title (regexp_replace :url "https?://[^/]+/wiki/" "")))))
