def test_from_iso_date_string(): str_value = "2017-08-23" datum = Datum() datum.from_iso_date_string(str_value) assert datum.value.day == 23 assert datum.value.month == 8 assert datum.value.year == 2017
def date_finder(text): date ="" date_pattern = '%{YEAR:year}-%{MONTHNUM:month}-%{MONTHDAY:day}' matches = list(datefinder.find_dates(s)) match_date = re.search('\d{4}-\d{2}-\d{2}', s) try: print "====using dateutil" for i in s.splitlines(): d = parser.parse(i) print(d.strftime("%Y-%m-%d")) except Exception as e: print e try: print "====pygrok===" grok = Grok(date_pattern) print(grok.match(s)) except Exception as e: print e try: print "====using date===" if len(matches) > 0: date = matches[0] print date else: print 'No dates found' except Exception as e: print e try: print "====using date===" date = datetime.datetime.strptime(match_date.group(), '%Y-%m-%d').date() print date except Exception as e: print e try: print "====using Chunkgrams===" chunkGram = r"""NE:{<NNP>+<CD>}""" chunkParser = nltk.RegexpParser(chunkGram) sentences = nltk.sent_tokenize(text) tokenized_sentences = [nltk.word_tokenize(sentence.strip()) for sentence in sentences] tagged_sentences = [nltk.pos_tag(i) for i in tokenized_sentences] chunked_sentences = [chunkParser.parse(i) for i in tagged_sentences] entity_names = [] for tree in chunked_sentences: entity_names.extend(extract_entity_names(tree)) print entity_names except Exception as e: print e try: print "===using pydatum==" datum = Datum() print (datum.from_iso_date_string(text)) except Exception as e: print e try: print "===using dateparser==" date = search_dates(text.decode('ascii','ignore')) print date except Exception as e: print e