def test_reporter_tokenizer(self): """Do we tokenize correctly?""" self.assertEqual( tokenize('See Roe v. Wade, 410 U. S. 113 (1973)'), ['See', 'Roe', 'v.', 'Wade,', '410', 'U. S.', '113', '(1973)']) self.assertEqual( tokenize('Foo bar eats grue, 232 Vet. App. (2003)'), ['Foo', 'bar', 'eats', 'grue,', '232', 'Vet. App.', '(2003)'])
def test_reporter_tokenizer(self): """Do we tokenize correctly?""" self.assertEqual(tokenize('See Roe v. Wade, 410 U. S. 113 (1973)'), ['See', 'Roe', 'v.', 'Wade,', '410', 'U. S.', '113', '(1973)']) self.assertEqual(tokenize('Foo bar eats grue, 232 Vet. App. (2003)'), ['Foo', 'bar', 'eats', 'grue,', '232', 'Vet. App.', '(2003)'])
def get_citations(text, html=True, do_post_citation=True, do_defendant=True): if html: text = get_visible_text(text) words = reporter_tokenizer.tokenize(text) citations = [] # Exclude first and last tokens when looking for reporters, because valid # citations must have a volume before and a page number after the reporter. for i in xrange(1, len(words) - 1): # Find reporter if words[i] in (EDITIONS.keys() + VARIATIONS_ONLY.keys()): citation = extract_base_citation(words, i) if citation is None: # Not a valid citation; continue looking continue if do_post_citation: add_post_citation(citation, words, i) if do_defendant: add_defendant(citation, words, i) citations.append(citation) # Disambiguate or drop all the reporters citations = disambiguate_reporters(citations) for citation in citations: if not citation.court and is_scotus_reporter(citation): citation.court = "scotus" return citations
def get_citations(text, html=True, do_post_citation=True, do_defendant=True): if html: text = get_visible_text(text) words = reporter_tokenizer.tokenize(text) citations = [] # Exclude first and last tokens when looking for reporters, because valid # citations must have a volume before and a page number after the reporter. for i in xrange(1, len(words) - 1): # Find reporter if words[i] in (EDITIONS.keys() + VARIATIONS_ONLY.keys()): citation = extract_base_citation(words, i) if citation is None: # Not a valid citation; continue looking continue if do_post_citation: add_post_citation(citation, words, i) if do_defendant: add_defendant(citation, words, i) citations.append(citation) # Disambiguate or drop all the reporters citations = disambiguate_reporters(citations) for citation in citations: if not citation.court and is_scotus_reporter(citation): citation.court = 'scotus' return citations