def test_get_tokens(self): text = "At 1997, 20 FEB here, in" dtok = DateFinder() tokens = dtok.tokenize_string(text) self.assertEqual(11, len(tokens)) self.assertEqual('', tokens[10][1]) self.assertEqual('delimiters', tokens[1][1])
def test_compare_date_string(self): text = """ In the event the real estate taxes levied or assessed against the land and building of which the premises are a part in future tax years are greater than the real estate taxes for the base tax year, the TENANT, shall pay within thirty (30) days after submission of the bill to TENANT for the increase in real estate taxes, as additional rent a proportionate share of such increases, which proportionate share shall be computed at 22.08% of the increase in taxes, but shall exclude any fine, penalty, or interest charge for late or non-payment of taxes by LANDLORD. The base tax year shall be July 1, 1994 to June 30, 1995. """ dtok = DateFinder() tokens = dtok.tokenize_string(text) merged = dtok.merge_tokens(tokens) pattern_start = '#'.join([ 't t', 't t', ' of ', 'on of t', ' to T', ' of ', ' at 22.08', ' of t', 't of t', ' by ', ' July 1, 1994 to June 30, 1995.\n ' ]) merged_start = '#'.join([m.match_str for m in merged]) self.assertEqual(pattern_start, merged_start) dstrs = list(dtok.extract_date_strings(text, strict=True)) dold = DateFinderOld() ostrs = list(dold.extract_date_strings(text, strict=True)) # tokenizers has slightly different logic self.assertGreaterEqual(len(dstrs), len(ostrs))
def test_merge_tokens(self): text = "At 1997, 20 FEB here, in" dtok = DateFinder() tokens = dtok.tokenize_string(text) merged = dtok.merge_tokens(tokens) self.assertEqual(1, len(merged)) self.assertEqual('At 1997, 20 FEB ', merged[0].match_str) self.assertEqual((0, 16), merged[0].indices) self.assertEqual('At', merged[0].captures['extra_tokens'][0].strip())
def test_get_date_time(self): text = "March 20, 2015 3:30 pm GMT " dtok = DateFinder() tokens = dtok.tokenize_string(text) merged = dtok.merge_tokens(tokens) self.assertEqual(1, len(merged))