def check_dates_set(self, date_src: List[Tuple[int, int, int]]): """ Test date extraction with provided dates. """ for year, month, day in date_src: try: date = datetime.date(year, month, day) except ValueError: continue # Try three versions text = """on {0}-{1}-{2}""".format(year, month, day) dates = get_dates_list(text) self.assertEqual(1, len(dates)) parsed = dates[0] self.assertEqual(date, parsed) text = "by " + date.strftime("%b %d, %Y") dates = get_dates_list(text) self.assertEqual(1, len(dates)) parsed = dates[0] self.assertEqual(date, parsed) text = "before " + date.strftime("%B %d, %Y") dates = get_dates_list(text) self.assertEqual(1, len(dates)) parsed = dates[0] self.assertEqual(date, parsed)
def test_date_may(self): """ Test that " may " alone does not parse. """ # Ensure that no value is returned for either strict or non-strict mode dates = get_dates_list("this may be a date", strict=False, return_source=True) self.assertEqual(0, len(dates)) dates = get_dates_list("this may be a date", strict=True, return_source=True) self.assertEqual(0, len(dates))
def _extract_variants_from_text(self, field, text: str, **kwargs): dates = get_dates_list(text) or [] dates = [ d.date() if isinstance(d, datetime) else d for d in dates if d.year < 3000 ] return dates or None
def test_should_be_fixed(self): text = """ This Amendment to the Employment Agreement (the "Amendment") is made as of the 20th day of May, 2003 between Premcor Inc. (the "Company") and [Executive's Name - See Schedule A attached hereto] (the "Executive"). """ dates = list(get_dates_list(text, strict=True)) self.assertEqual(0, len(dates))
def test_is_it_a_date(self): """ Somehow "29MAY19 1350" produces 1350-01-01 that doesn't go through validation """ text = "NOT RCVD BY RJ BY 29MAY19 1350 DOH LT REF" dates = list(get_dates_list(text, strict=True)) self.assertEqual(1, len(dates)) self.assertEqual(datetime.datetime(2019, 5, 29, 13, 50, 0), dates[0])
def test_date_may(): """ Test that " may " alone does not parse. :return: """ # Ensure that no value is returned for either strict or non-strict mode nonstrict_result = lexnlp_tests.benchmark_extraction_func(get_dates_list, "this may be a date", strict=False, return_source=True) strict_result = get_dates_list("this may be a date", strict=True, return_source=True) assert_equal(len(nonstrict_result), 0) assert_equal(len(strict_result), 0)
def test_two_dates_strict(self): text = """ This monthly maintenance and support arrangement will have an initial term of six (6) months. The arrangement will then automatically renew for an additional twelve (12) months at the above rates and conditions unless written notification to US/INTELICOM of Licensee's intent to cancel the arrangement is received no later than September 1, 1998. Unless Licensee elects to cancel this arrangement at the end of the first six months, the "initial term" of the arrangement will be through September 30, 1999. """ dates = get_dates_list(text) self.assertEqual(2, len(dates))
def test_moar_dates(self): text = """ 2. Amendment to Interest Rate. Beginning on February 1, 1998, and continuing until July 18, 2002, which is the fifth anniversary of the Loan conversion date, interest shall be fixed at an annual rate of 7.38%, which rate is equal to 200 basis points above the Bank's five-year "Treasury Constant Rate" in effect on January 23, 1998. In accordance with the Agreement, the interest rate shall be adjusted again on July 18, 2002. """ dates = get_dates_list(text) self.assertEqual(4, len(dates))
def extraction_function(self, field, possible_value, text): if isinstance(possible_value, datetime) or isinstance( possible_value, date): return possible_value if not possible_value and not text: return None possible_value = str(possible_value) if possible_value else text dates = get_dates_list(possible_value) return ValueExtractionHint.get_value( dates, field.item_number) if dates else None
def test_more_more_dates(self): text = """ In the event the real estate taxes levied or assessed against the land and building of which the premises are a part in future tax years are greater than the real estate taxes for the base tax year, the TENANT, shall pay within thirty (30) days after submission of the bill to TENANT for the increase in real estate taxes, as additional rent a proportionate share of such increases, which proportionate share shall be computed at 22.08% of the increase in taxes, but shall exclude any fine, penalty, or interest charge for late or non-payment of taxes by LANDLORD. The base tax year shall be July 1, 1994 to June 30, 1995. """ dates = get_dates_list(text) self.assertEqual(2, len(dates))
def parse(self, text, text_unit_id, _text_unit_lang, **kwargs) -> ParseResults: strict = kwargs.get('strict', False) found = dates.get_dates_list(text, strict=strict, return_source=False) if found: unique = set([ i.date() if isinstance(i, datetime.datetime) else i for i in found ]) return ParseResults({ DateUsage: [ DateUsage(text_unit_id=text_unit_id, date=item, count=found.count(item)) for item in unique ] })
def test_one_date_this(self): text = """made this November 16, 2009. This is a paragraph which has multiple sentences.""" dates = get_dates_list(text) self.assertEqual(1, len(dates))
def test_date_en_gb(self): text = 'Commencement Date: 09/12/2022.' dates = get_dates_list(text, locale='en-GB') self.assertEqual(1, len(dates)) self.assertEqual(12, dates[0].month)
def test_another_may(self): text = "Sections 12.1, 12.2, 12.3, 12.4, 12.6, 12.7 and 12.12\n" + \ "may be amended only" dates = list(get_dates_list(text, strict=True)) self.assertEqual(0, len(dates))
def _extract_variants_from_text(self, field, text: str, **kwargs): dates = get_dates_list(text) or [] dates = [d for d in dates if d.year < 3000] return dates or None
def test_section(self): text = "Section 7.7.10 may be made" dates = list(get_dates_list(text, strict=True)) self.assertEqual(0, len(dates))
def test_fp(self): text = """ this Section 13.2 may exercise all""" dates = list(get_dates_list(text, strict=True)) self.assertEqual(0, len(dates))
def test_date_first_aug(self): dates = list(get_dates_list("second of August 2014")) self.assertEqual(1, len(dates)) dates = get_dates_list("2nd of August 2014") self.assertEqual(1, len(dates))
def test_date_en_us(self): text = 'Commencement Date: 09/12/2022.' dates = get_dates_list(text) self.assertEqual(1, len(dates)) self.assertEqual(9, dates[0].month)
def test_two_ranges(self): text = """ be July 1, 1994 to June 30, 1995 through 10/07/1998 """ dates = get_dates_list(text) self.assertEqual(len(dates), 3)
def test_august(self): text = """Commencement Date: August 1, 2013.""" dates = get_dates_list(text) self.assertEqual(1, len(dates)) self.assertEqual(8, dates[0].month)
def _extract_variants_from_text(self, field, text: str): return get_dates_list(text)