(ns leafgrabber.free-text.author-gvelez
  (:use [leafgrabber.free-text.extractor :only (add-ext-group add-enum-regex)]
        )
  )

(add-ext-group :price #{:raw_price_1 :raw_price_2})

(add-enum-regex { :name :dum_test
                  :core #"test"
                  :patterns [
                     [#"CORE (stuff)" 1]
                     [#"any (stuff)" 1]
                  ]
                  :default ""
               })

(add-enum-regex { :name :raw_rating_1
                  :core #"(?:rating|ratingvalue)"
                  :patterns [
                     [#"class=\"average CORE [^\"]+\" title=\"(\d(?:\.\d)?|\.\d)\"" 1]
                     [#"itemprop=\"CORE\">(\d(?:\.\d)?|\.\d)<" 1]
                     [#"property=\"review:CORE\" content=\"(\d(?:\.\d)?|\.\d)\"" 1]
                     [#"user CORE: (\d(?:\.\d)?|\.\d) out of 5 stars" 1]
                  ]
                  :default ""
                })

(add-enum-regex { :name :raw_hours_1
                  :core #"(?:open hours|hours of operation|hours)"
                 :patterns [
                     [#"CORE\s*\:?(?:<[^>]+>|[\.\s\-,\;]|\&nbsp;|\&amp;|and|& )*((?:(?:(?:mon|tues?|wed(?:nes)?|thur?s?|fri|sat(?:ur)?|sun)(?:day)?(?:<[^>]+>|[\.\s\-,\;]|\&nbsp;|\&amp;|and|& )*)+(?:(?:\d+(?::\d\d)?\s*(?:a\.?m\.?|p\.?m\.?)?)(?:<[^>]+>|[\.\s\-,\;]|\&nbsp;|\&amp;|and|& )*)+(?:<[^>]+>|[\.\s\-,\;]|\&nbsp;|\&amp;|and|& )*)+)" 1]
                  ]
                  :default "" 
                  :comment " Match core with optional : 
                             Allow intervening tags, whitespace, . , ; & ' and ' or html codes for same
                             Match any sequence of days and times with possible intervening chars as above 
                   "
                })


(add-enum-regex { :name :raw_hours_3
                  :core #"bizAdditionalInfo"
                 :patterns [
                     [#"<[^>]*CORE[^>]+>.*?((?:(?:(?:mon|tues?|wed(?:nes)?|thur?s?|fri|sat(?:ur)?|sun)(?:day)?(?:<[^>]+>|[\.\s\-,\;]|\&nbsp;|\&amp;|and|& )*)+(?:(?:\d+(?::\d\d)?\s*(?:a\.?m\.?|p\.?m\.?)?)(?:<[^>]+>|[\.\s\-,\;]|\&nbsp;|\&amp;|and|& )*)+(?:<[^>]+>|[\.\s\-,\;]|\&nbsp;|\&amp;|and|& )*)+)" 1]
                  ]
                  :default "" 
                  :comment "Match core inside a tag
                            Match the first following sequence of days and times with possible intervening chars
                            Allow intervening tags, whitespace, . , ; & ' and ' or html codes for same
                  "
                })


(add-ext-group :full_hours #{:full_raw_hours_1 :full_raw_hours_3})

(add-enum-regex { :name :full_raw_hours_1
                  :core #"hours"
                  :patterns [
                        [#"CORE(.{0,200}\d+.{0,1000})" 1]
                        ]
                   :default ""
                })

(add-enum-regex { :name :full_raw_hours_3
                  :core #"bizAdditionalInfo"
                  :patterns [
                        [#"CORE(.{0,300}hours.{0,1000})" 1]
                        ]
                   :default ""
                })



(add-enum-regex { :name :raw_price_1
                  :core #"prices?(?: ranges?)?|cost|pricing|(?:average|most) (?:meal|entree)s?|menu items?"
                  :patterns [
                     [#"=s*\"[^\"]*CORE" "no-evidence"]
                     [#"group|two|three|shared?|both|us|our|bottle|gift|wine|appetizer|coupon|cover|sale|certificate"    "no-evidence"]
                     [#"CORE ?(?:is|was|are|were|run|ran|at|for|range|:) ((?:from|somewhere|under|about|\$\d+\.?\d?\d?|to|around|[ \-])+)"	1]
                  ]
                  :default "no-evidence"
                })

(add-enum-regex { :name :raw_price_2
                  :core #"(\$\d+(?:\.\d\d)?)"
                  :patterns [
                    [#"=s*\"[^\"]*CORE" "no-evidence"]
                    [#"\b(?:\=|save|gadget|drink|tea|perrier|beer|coffee|worth|seat|ticket|cover|coupon|groupon|million|parking|sales|pet|night|spa|wine|bottle|appetizer|gift)\b" "no-evidence"]
                    [#"CORE value"      "no-evidence"]
                    [#"CORE (?:each|(?:per|a) person)"	1]
                    [#"\b(?:group|two|people|we had|three|shared?|both|us|our|bottle|gift|wine|appetizer|coupon|cover|sale|certificate)\b"  "no-evidence"]
                    [#"(?:dish(?:es)?|meals?|specials?|combos?|lunch|breakfast|dinner|buffet|plate|roll|platter|got(?:ten)? the|had the|menu item|entree|order|all[\- ]you[\- ]can[\- ]eat)" 1]
                  ]
                  :default "no-evidence"
                }) 


(add-enum-regex { :name :raw_chef_1
                  :core "chefs?"
                  :patterns [
                    [#"=s*\"[^\"]*CORE" "no-evidence"]
                    ;; currently we are only case-insensitive.  will validate by case in Scarecrow
                    [#"CORE,? ([A-Z][a-z]+ [A-Z][a-z'\-]+)" 1]
                    [#"([A-Z][a-z]+ [A-Z][a-z'\-]+) is the (?:executive )?CORE" 1]
                  ]
                  :default "no-evidence"
               })


(add-enum-regex { :name :raw_owner_1
                  :core "owners?"
                  :patterns [
                    [#"=s*\"[^\"]*CORE" "no-evidence"]
                    ;; currently we are only case-insensitive.  will validate by case in Scarecrow
                    [#"CORE,? ([A-Z][a-z]+ [A-Z][a-z'\-]+)" 1]
                    [#"([A-Z][a-z]+ [A-Z][a-z'\-]+) is the (?:executive )?CORE" 1]
                  ]
                  :default "no-evidence"
               })


(add-enum-regex { :name :founded_1
                  :core "founded"
                  :patterns [
                    [#"=s*\"[^\"]*CORE" "no-evidence"]
                    [#"CORE in (\d{4})" 1]
                  ]
                  :default "no-evidence"
               })
