def setup(self): self.plusdate = PlusMinus("years|year|yrs|yr|ys|y", "months|month|mons|mon|mos|mo", "weeks|week|wks|wk|ws|w", "days|day|dys|dy|ds|d", "hours|hour|hrs|hr|hs|h", "minutes|minute|mins|min|ms|m", "seconds|second|secs|sec|s") self.dayname = Daynames("next", "last", ("monday|mon|mo", "tuesday|tues|tue|tu", "wednesday|wed|we", "thursday|thur|thu|th", "friday|fri|fr", "saturday|sat|sa", "sunday|sun|su")) midnight = Regex("midnight", lambda p, dt: adatetime(hour=0, minute=0, second=0, microsecond=0)) noon = Regex("noon", lambda p, dt: adatetime(hour=12, minute=0, second=0, microsecond=0)) now = Regex("now", lambda p, dt: dt) self.time = Choice((self.time12, self.time24, midnight, noon, now), name="time") def tomorrow_to_date(p, dt): d = dt.date() + timedelta(days=+1) return adatetime(year=d.year, month=d.month, day=d.day) tomorrow = Regex("tomorrow", tomorrow_to_date) def yesterday_to_date(p, dt): d = dt.date() + timedelta(days=-1) return adatetime(year=d.year, month=d.month, day=d.day) yesterday = Regex("yesterday", yesterday_to_date) thisyear = Regex("this year", lambda p, dt: adatetime(year=dt.year)) thismonth = Regex("this month", lambda p, dt: adatetime(year=dt.year, month=dt.month)) today = Regex("today", lambda p, dt: adatetime(year=dt.year, month=dt.month, day=dt.day)) self.month = Month("january|jan", "february|febuary|feb", "march|mar", "april|apr", "may", "june|jun", "july|jul", "august|aug", "september|sept|sep", "october|oct", "november|nov", "december|dec") # If you specify a day number you must also specify a month... this # Choice captures that constraint self.dmy = Choice((Sequence((self.day, self.month, self.year), name="dmy"), Sequence((self.month, self.day, self.year), name="mdy"), Sequence((self.year, self.month, self.day), name="ymd"), Sequence((self.year, self.day, self.month), name="ydm"), Sequence((self.day, self.month), name="dm"), Sequence((self.month, self.day), name="md"), Sequence((self.month, self.year), name="my"), self.month, self.year, self.dayname, tomorrow, yesterday, thisyear, thismonth, today, now, ), name="date") self.datetime = Bag((self.time, self.dmy), name="datetime") self.bundle = Choice((self.plusdate, self.datetime, self.simple), name="bundle") self.torange = Combo((self.bundle, "to", self.bundle), name="torange") self.all = Choice((self.torange, self.bundle), name="all")
class DateParser(object): """Base class for locale-specific parser classes. """ day = Regex("(?P<day>([123][0-9])|[1-9])(?=(\\W|$))(?!=:)", lambda p, dt: adatetime(day=p.day)) year = Regex("(?P<year>[0-9]{4})(?=(\\W|$))", lambda p, dt: adatetime(year=p.year)) time24 = Regex("(?P<hour>([0-1][0-9])|(2[0-3])):(?P<mins>[0-5][0-9])(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?(?=(\\W|$))", lambda p, dt: adatetime(hour=p.hour, minute=p.mins, second=p.secs, microsecond=p.usecs)) time12 = Time12() def __init__(self): simple_year = "(?P<year>[0-9]{4})" simple_month = "(?P<month>[0-1][0-9])" simple_day = "(?P<day>[0-3][0-9])" simple_hour = "(?P<hour>([0-1][0-9])|(2[0-3]))" simple_minute = "(?P<minute>[0-5][0-9])" simple_second = "(?P<second>[0-5][0-9])" simple_usec = "(?P<microsecond>[0-9]{6})" simple_seq = Sequence((simple_year, simple_month, simple_day, simple_hour, simple_minute, simple_second, simple_usec), sep="[- .:/]*", name="simple", progressive=True) self.simple = Sequence((simple_seq, "(?=(\\s|$))"), sep='') self.setup() def setup(self): raise NotImplementedError # def get_parser(self): return self.all def parse(self, text, dt, pos=0, debug=-9999): parser = self.get_parser() d, newpos = parser.parse(text, dt, pos=pos, debug=debug) if isinstance(d, (adatetime, timespan)): d = d.disambiguated(dt) return (d, newpos) def date_from(self, text, basedate=None, pos=0, debug=-9999, toend=True): if basedate is None: basedate = datetime.utcnow() parser = self.get_parser() if toend: parser = ToEnd(parser) d = parser.date_from(text, basedate, pos=pos, debug=debug) if isinstance(d, (adatetime, timespan)): d = d.disambiguated(basedate) return d
def test_date_range(): schema = fields.Schema(text=fields.TEXT, date=fields.DATETIME) qp = qparser.QueryParser("text", schema) basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) qp.add_plugin(dateparse.DateParserPlugin(basedate)) q = qp.parse(u("date:['30 march' to 'next wednesday']")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 30).floor()) assert_equal(q.enddate, adatetime(2010, 9, 22).ceil()) q = qp.parse(u("date:[to 'next wednesday']")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, None) assert_equal(q.enddate, adatetime(2010, 9, 22).ceil()) q = qp.parse(u("date:['30 march' to]")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 30).floor()) assert_equal(q.enddate, None) q = qp.parse(u("date:[30 march to next wednesday]")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 30).floor()) assert_equal(q.enddate, adatetime(2010, 9, 22).ceil()) q = qp.parse(u("date:[to next wednesday]")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, None) assert_equal(q.enddate, adatetime(2010, 9, 22).ceil()) q = qp.parse(u("date:[30 march to]")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 30).floor()) assert_equal(q.enddate, None)
def _parse_datestring(self, qstring): # This method parses a very simple datetime representation of the form # YYYY[MM[DD[hh[mm[ss[uuuuuu]]]]]] from whoosh.support.times import adatetime, fix, is_void qstring = qstring.replace(" ", "").replace("-", "").replace(".", "") year = month = day = hour = minute = second = microsecond = None if len(qstring) >= 4: year = int(qstring[:4]) if len(qstring) >= 6: month = int(qstring[4:6]) if len(qstring) >= 8: day = int(qstring[6:8]) if len(qstring) >= 10: hour = int(qstring[8:10]) if len(qstring) >= 12: minute = int(qstring[10:12]) if len(qstring) >= 14: second = int(qstring[12:14]) if len(qstring) == 20: microsecond = int(qstring[14:]) at = fix(adatetime(year, month, day, hour, minute, second, microsecond)) if is_void(at): raise Exception("%r is not a parseable date" % qstring) return at
def test_daterange_multi(): schema = fields.Schema(text=fields.TEXT, start=fields.DATETIME, end=fields.DATETIME) qp = qparser.QueryParser("text", schema) basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) qp.add_plugin(dateparse.DateParserPlugin(basedate)) q = qp.parse("start:[2008 to] AND end:[2011 to 2011]") assert_equal(q.__class__, query.And) assert_equal(q[0].__class__, query.DateRange) assert_equal(q[1].__class__, query.DateRange) assert_equal(q[0].startdate, adatetime(2008).floor()) assert_equal(q[0].enddate, None) assert_equal(q[1].startdate, adatetime(2011).floor()) assert_equal(q[1].enddate, adatetime(2011).ceil())
def props_to_date(self, props, dt): if self.fn: return self.fn(props, dt) else: args = {} for key in adatetime.units: args[key] = props.get(key) return adatetime(**args)
def parse(self, text, dt, pos=0, debug=-9999): try: d, pos = self.element.parse(text, dt, pos, debug + 1) except TimeError: d, pos = None, None if d: return (d, pos) else: return (adatetime(), pos)
def parse(self, text, dt, pos=0, debug=-9999): d = adatetime() first = True foundall = False failed = False print_debug(debug, "Seq %s sep=%r text=%r", self.name, self.sep_pattern, text[pos:]) for e in self.elements: print_debug(debug, "Seq %s text=%r", self.name, text[pos:]) if self.sep_expr and not first: print_debug(debug, "Seq %s looking for sep", self.name) m = self.sep_expr.match(text, pos) if m: pos = m.end() else: print_debug(debug, "Seq %s didn't find sep", self.name) break print_debug(debug, "Seq %s trying=%r at=%s", self.name, e, pos) try: at, newpos = e.parse(text, dt, pos=pos, debug=debug + 1) except TimeError: failed = True break print_debug(debug, "Seq %s result=%r", self.name, at) if not at: break pos = newpos print_debug(debug, "Seq %s adding=%r to=%r", self.name, at, d) try: d = fill_in(d, at) except TimeError: print_debug(debug, "Seq %s Error in fill_in", self.name) failed = True break print_debug(debug, "Seq %s filled date=%r", self.name, d) first = False else: foundall = True if not failed and (foundall or (not first and self.progressive)): print_debug(debug, "Seq %s final=%r", self.name, d) return (d, pos) else: print_debug(debug, "Seq %s failed", self.name) return (None, None)
def parse(self, text, dt, pos=0, debug=-9999): first = True d = adatetime() seen = [False] * len(self.elements) while True: newpos = pos print_debug(debug, "Bag %s text=%r", self.name, text[pos:]) if not first: print_debug(debug, "Bag %s looking for sep", self.name) m = self.sep_expr.match(text, pos) if m: newpos = m.end() else: print_debug(debug, "Bag %s didn't find sep", self.name) break for i, e in enumerate(self.elements): print_debug(debug, "Bag %s trying=%r", self.name, e) try: at, xpos = e.parse(text, dt, newpos, debug + 1) except TimeError: at, xpos = None, None print_debug(debug, "Bag %s result=%r", self.name, at) if at: if self.onceper and seen[i]: return (None, None) d = fill_in(d, at) newpos = xpos seen[i] = True break else: break pos = newpos if self.onceper and all(seen): break first = False if (not any(seen) or (self.allof and not all(seen[pos] for pos in self.allof)) or (self.anyof and not any(seen[pos] for pos in self.anyof)) or (self.requireall and not all(seen))): return (None, None) print_debug(debug, "Bag %s final=%r", self.name, d) return (d, pos)
def parse(self, text, dt, pos=0, debug= -9999): d = adatetime() first = True foundall = False failed = False print_debug(debug, "Seq %s sep=%r text=%r", self.name, self.sep_pattern, text[pos:]) for e in self.elements: print_debug(debug, "Seq %s text=%r", self.name, text[pos:]) if self.sep_expr and not first: print_debug(debug, "Seq %s looking for sep", self.name) m = self.sep_expr.match(text, pos) if m: pos = m.end() else: print_debug(debug, "Seq %s didn't find sep", self.name) break print_debug(debug, "Seq %s trying=%r at=%s", self.name, e, pos) try: at, newpos = e.parse(text, dt, pos=pos, debug=debug + 1) except TimeError: failed = True break print_debug(debug, "Seq %s result=%r", self.name, at) if not at: break pos = newpos print_debug(debug, "Seq %s adding=%r to=%r", self.name, at, d) try: d = fill_in(d, at) except TimeError: print_debug(debug, "Seq %s Error in fill_in", self.name) failed = True break print_debug(debug, "Seq %s filled date=%r", self.name, d) first = False else: foundall = True if not failed and (foundall or (not first and self.progressive)): print_debug(debug, "Seq %s final=%r", self.name, d) return (d, pos) else: print_debug(debug, "Seq %s failed", self.name) return (None, None)
def props_to_date(self, p, dt): isam = p.ampm.lower().startswith("a") if p.hour == 12: if isam: hr = 0 else: hr = 12 else: hr = p.hour if not isam: hr += 12 return adatetime(hour=hr, minute=p.mins, second=p.secs, microsecond=p.usecs)
def props_to_date(self, p, dt): if re.match(p.dir, self.last_pattern): dir = -1 else: dir = 1 for daynum, expr in enumerate(self._dayname_exprs): m = expr.match(p.day) if m: break current_daynum = dt.weekday() days_delta = relative_days(current_daynum, daynum, dir) d = dt.date() + timedelta(days=days_delta) return adatetime(year=d.year, month=d.month, day=d.day)
def test_dateparser(): schema = fields.Schema(text=fields.TEXT, date=fields.DATETIME) qp = default.QueryParser("text", schema) errs = [] def cb(arg): errs.append(arg) basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) qp.add_plugin(dateparse.DateParserPlugin(basedate, callback=cb)) q = qp.parse(u("hello date:'last tuesday'")) assert_equal(q.__class__, query.And) assert_equal(q[1].__class__, query.DateRange) assert_equal(q[1].startdate, adatetime(2010, 9, 14).floor()) assert_equal(q[1].enddate, adatetime(2010, 9, 14).ceil()) q = qp.parse(u("date:'3am to 5pm'")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 9, 20, 3).floor()) assert_equal(q.enddate, adatetime(2010, 9, 20, 17).ceil()) q = qp.parse(u("date:blah")) assert_equal(q, query.NullQuery) assert_equal(errs[0], "blah") q = qp.parse(u("hello date:blarg")) assert_equal(q.__unicode__(), "(text:hello AND <_NullQuery>)") assert_equal(q[1].error, "blarg") assert_equal(errs[1], "blarg") q = qp.parse(u("hello date:20055x10")) assert_equal(q.__unicode__(), "(text:hello AND <_NullQuery>)") assert_equal(q[1].error, "20055x10") assert_equal(errs[2], "20055x10") q = qp.parse(u("hello date:'2005 19 32'")) assert_equal(q.__unicode__(), "(text:hello AND <_NullQuery>)") assert_equal(q[1].error, "2005 19 32") assert_equal(errs[3], "2005 19 32") q = qp.parse(u("date:'march 24 to dec 12'")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 24).floor()) assert_equal(q.enddate, adatetime(2010, 12, 12).ceil()) q = qp.parse(u("date:('30 june' OR '10 july') quick")) assert_equal(q.__class__, query.And) assert_equal(len(q), 2) assert_equal(q[0].__class__, query.Or) assert_equal(q[0][0].__class__, query.DateRange) assert_equal(q[0][1].__class__, query.DateRange)
def test_date_range(): schema = fields.Schema(text=fields.TEXT, date=fields.DATETIME) qp = qparser.QueryParser("text", schema) basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) qp.add_plugin(dateparse.DateParserPlugin(basedate)) q = qp.parse(u("date:['30 march' to 'next wednesday']")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 30).floor()) assert_equal(q.enddate, adatetime(2010, 9, 22).ceil()) q = qp.parse(u("date:[to 'next wednesday']")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, None) assert_equal(q.enddate, adatetime(2010, 9, 22).ceil()) q = qp.parse(u("date:['30 march' to]")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 30).floor()) assert_equal(q.enddate, None) print("!!!!!!!!!!!!!!!!!!!!") q = qp.parse(u("date:[30 march to next wednesday]")) print("q=", q) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 30).floor()) assert_equal(q.enddate, adatetime(2010, 9, 22).ceil()) q = qp.parse(u("date:[to next wednesday]")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, None) assert_equal(q.enddate, adatetime(2010, 9, 22).ceil()) q = qp.parse(u("date:[30 march to]")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 30).floor()) assert_equal(q.enddate, None)
def yesterday_to_date(p, dt): d = dt.date() + timedelta(days=-1) return adatetime(year=d.year, month=d.month, day=d.day)
def tomorrow_to_date(p, dt): d = dt.date() + timedelta(days=+1) return adatetime(year=d.year, month=d.month, day=d.day)
def test_free_dates(): a = analysis.StandardAnalyzer(stoplist=None) schema = fields.Schema(text=fields.TEXT(analyzer=a), date=fields.DATETIME) qp = qparser.QueryParser("text", schema) basedate = datetime(2010, 9, 20, 15, 16, 6, 454000) qp.add_plugin(dateparse.DateParserPlugin(basedate, free=True)) q = qp.parse(u("hello date:last tuesday")) assert_equal(q.__class__, query.And) assert_equal(len(q), 2) assert_equal(q[0].__class__, query.Term) assert_equal(q[0].text, "hello") assert_equal(q[1].__class__, query.DateRange) assert_equal(q[1].startdate, adatetime(2010, 9, 14).floor()) assert_equal(q[1].enddate, adatetime(2010, 9, 14).ceil()) q = qp.parse(u("date:mar 29 1972 hello")) assert_equal(q.__class__, query.And) assert_equal(len(q), 2) assert_equal(q[0].__class__, query.DateRange) assert_equal(q[0].startdate, adatetime(1972, 3, 29).floor()) assert_equal(q[0].enddate, adatetime(1972, 3, 29).ceil()) assert_equal(q[1].__class__, query.Term) assert_equal(q[1].text, "hello") q = qp.parse(u("date:2005 march 2")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2005, 3, 2).floor()) assert_equal(q.enddate, adatetime(2005, 3, 2).ceil()) q = qp.parse(u("date:'2005' march 2")) assert_equal(q.__class__, query.And) assert_equal(len(q), 3) assert_equal(q[0].__class__, query.DateRange) assert_equal(q[0].startdate, adatetime(2005).floor()) assert_equal(q[0].enddate, adatetime(2005).ceil()) assert_equal(q[1].__class__, query.Term) assert_equal(q[1].fieldname, "text") assert_equal(q[1].text, "march") q = qp.parse(u("date:march 24 to dec 12")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 3, 24).floor()) assert_equal(q.enddate, adatetime(2010, 12, 12).ceil()) q = qp.parse(u("date:5:10pm")) assert_equal(q.__class__, query.DateRange) assert_equal(q.startdate, adatetime(2010, 9, 20, 17, 10).floor()) assert_equal(q.enddate, adatetime(2010, 9, 20, 17, 10).ceil()) q = qp.parse(u("(date:30 june OR date:10 july) quick")) assert_equal(q.__class__, query.And) assert_equal(len(q), 2) assert_equal(q[0].__class__, query.Or) assert_equal(q[0][0].__class__, query.DateRange) assert_equal(q[0][1].__class__, query.DateRange)