Пример #1
0
def test_from_iso_date_string():
    str_value = "2017-08-23"
    datum = Datum()
    datum.from_iso_date_string(str_value)

    assert datum.value.day == 23
    assert datum.value.month == 8
    assert datum.value.year == 2017
Пример #2
0
def date_finder(text):
    date =""
    date_pattern = '%{YEAR:year}-%{MONTHNUM:month}-%{MONTHDAY:day}'
    matches = list(datefinder.find_dates(s))
    match_date = re.search('\d{4}-\d{2}-\d{2}', s)

    try:
        print "====using dateutil"
        for i in s.splitlines():
            d = parser.parse(i)
            print(d.strftime("%Y-%m-%d"))
    except Exception as e:
        print e
    try:
        print "====pygrok==="
        grok = Grok(date_pattern)
        print(grok.match(s))
    except Exception as e:
        print e
    try:
        print "====using date==="
        if len(matches) > 0:
            date = matches[0]
            print date
        else:
            print 'No dates found'
    except Exception as e:
        print e
    try:
        print "====using date==="
        date = datetime.datetime.strptime(match_date.group(), '%Y-%m-%d').date()
        print date
    except Exception as e:
        print e
    try:
        print "====using Chunkgrams==="
        chunkGram = r"""NE:{<NNP>+<CD>}"""
        chunkParser = nltk.RegexpParser(chunkGram)
        sentences = nltk.sent_tokenize(text)
        tokenized_sentences = [nltk.word_tokenize(sentence.strip()) for sentence in sentences]
        tagged_sentences = [nltk.pos_tag(i) for i in tokenized_sentences]
        chunked_sentences = [chunkParser.parse(i) for i in tagged_sentences] 
        entity_names = []
        for tree in chunked_sentences:
            entity_names.extend(extract_entity_names(tree))
        print entity_names
    except Exception as e:
        print e
    try:
        print "===using pydatum=="
        datum = Datum()
        print (datum.from_iso_date_string(text))
    except Exception as e:
        print e
    try:
        print "===using dateparser=="
        date = search_dates(text.decode('ascii','ignore'))
        print date
    except Exception as e:
        print e