示例#1
0
文件: __init__.py 项目: BKTO/bdates
def extract_dates(text, sorting=None):
    global patterns

    # convert to unicode if the text is in a bytestring
    # we conver to unicode because it is easier to work with
    # and it handles text in foreign languages much better
    if isinstance(text, str):
        text = text.decode('utf-8')

    matches = []
    completes = []
    partials = []

    for match in re.finditer(re.compile(patterns['date'], flags), text):
    #    print "match is", match.groupdict()
        # this goes through the dictionary and removes empties and changes the keys back, e.g. from month_myd to month
        match = dict((k.split("_")[0], num(v)) for k, v in match.groupdict().iteritems() if num(v))

        if all(k in match for k in ("day","month", "year")): 
            completes.append(match)
        else:
            partials.append(match)

    #print "\ncompletes are", completes

    # iterate through partials
    # if a more specific date is given in the completes, drop the partial
    # for example if Feb 1, 2014 is picked up and February 2014, too, drop February 2014

    partials = [partial for partial in partials if not is_date_in_list(partial, completes)]
    #print "\npartials are", partials
  
    # convert completes and partials and return list ordered by:
    # complete/partial, most common, most recent
    for d in completes:
      try:
        print datetime(normalize_year(d['year']),int(d['month']),int(d['day']))  
      except Exception as e:
        print d['year'], d['month'], d['day']
    completes = [datetime(normalize_year(d['year']),int(d['month']),int(d['day'])) for d in completes]


    if sorting:
        counter = Counter(completes)
        completes = remove_duplicates(sorted(completes, key = lambda x: (counter[x], x.toordinal()), reverse=True))

    #average_date = mean([d for d in completes])

    return completes
示例#2
0
文件: __init__.py 项目: BKTO/bdates
def date_from_dict(match):
    month = match['month']
    if month.isdigit():
        month = int(month)
    else:
        month = month_to_number[month.title()]

    try:
        day = int(match.group("day"))
    except Exception as e:
        #print "exception is", e
        day = 1

    try:
        return datetime(int(match.group("year")), month, day, tzinfo=tzinfo)
    except Exception as e:
        print e
示例#3
0
文件: __init__.py 项目: BKTO/bdates
def getFirstDateFromText(text):
    print "starting getFirstDateFromText"
    global patterns

    # convert to unicode if the text is in a bytestring
    # we conver to unicode because it is easier to work with
    # and it handles text in foreign languages much better
    if isinstance(text, str):
        text = text.decode('utf-8')

    for match in re.finditer(re.compile(patterns['date'], flags), text):
        print "\nmatch is", match.group(0)
        if not isDefinitelyNotDate(match.group(0)):
            match = dict((k.split("_")[0], num(v)) for k, v in match.groupdict().iteritems() if num(v))
            print "match is", match
            if all(k in match for k in ("day","month", "year")):
                print "returning getFirstDateFromText"
                return datetime(normalize_year(match['year']),int(match['month']),int(match['day']), tzinfo=tzinfo)
    print "finishing getFirstDateFromText"