Пример #1
0
 def test_hash(self):
     """Do two citation objects hash to the same?"""
     Citation.__hash__ = Citation.fuzzy_hash
     citations = [
         Citation(reporter=2, volume="U.S.", page="2", reporter_index=1),
         Citation(reporter=2, volume="U.S.", page="2", reporter_index=2),
     ]
     self.assertEqual(
         hash(citations[0]), hash(citations[1]),
     )
     Citation.fuzzy_hash = Citation.__hash__
Пример #2
0
 def test_find_tc_citations(self):
     """Can we parse tax court citations properly?"""
     test_pairs = (
         # Test with atypical formatting for Tax Court Memos
         ('the 1 T.C. No. 233',
          [Citation(volume=1, reporter='T.C. No.', page=233,
                    canonical_reporter=u'T.C. No.', lookup_index=0,
                    reporter_index=2, reporter_found='T.C. No.')]),
         ('word T.C. Memo. 2019-233',
          [Citation(volume=2019, reporter='T.C. Memo.', page=233,
                    canonical_reporter=u'T.C. Memo.', lookup_index=0,
                    reporter_index=1, reporter_found='T.C. Memo.')]),
         ('something T.C. Summary Opinion 2019-233',
          [Citation(volume=2019, reporter='T.C. Summary Opinion', page=233,
                    canonical_reporter=u'T.C. Summary Opinion',
                    lookup_index=0,
                    reporter_index=1,
                    reporter_found='T.C. Summary Opinion')]),
         ('T.C. Summary Opinion 2018-133',
          [Citation(volume=2018, reporter='T.C. Summary Opinion', page=133,
                    canonical_reporter=u'T.C. Summary Opinion',
                    lookup_index=0,
                    reporter_index=0,
                    reporter_found='T.C. Summary Opinion')]),
         ('1     UNITED STATES TAX COURT REPORT   (2018)',
          [Citation(volume=1, reporter='T.C.', page=2018,
                    canonical_reporter=u'T.C.',
                    lookup_index=0,
                    reporter_index=1,
                    reporter_found='UNITED STATES TAX COURT REPORT')]),
         ('U.S. of A. 1     UNITED STATES TAX COURT REPORT   (2018)',
          [Citation(volume=1, reporter='T.C.', page=2018,
                    canonical_reporter=u'T.C.',
                    lookup_index=0,
                    reporter_index=4,
                    reporter_found='UNITED STATES TAX COURT REPORT')]),
         ('U.S. 1234 1 U.S. 1',
          [Citation(volume=1, reporter='U.S.', page=1,
                    canonical_reporter=u'U.S.',
                    lookup_index=0,
                    reporter_index=3,
                    court='scotus',
                    reporter_found='U.S.')]),
     )
     for q, a in test_pairs:
         print "Testing citation extraction for %s..." % q,
         cites_found = get_citations(q)
         self.assertEqual(
             cites_found,
             a,
             msg='%s\n%s\n\n    !=\n\n%s' % (
                 q,
                 ",\n".join([str(cite.__dict__) for cite in cites_found]),
                 ",\n".join([str(cite.__dict__) for cite in a]),
             )
         )
         print "✓"
Пример #3
0
 def test_find_citations(self):
     """Can we find and make Citation objects from strings?"""
     test_pairs = (
         # Basic test
         ('1 U.S. 1', [
             Citation(volume=1,
                      reporter='U.S.',
                      page=1,
                      canonical_reporter=u'U.S.',
                      lookup_index=0,
                      court='scotus',
                      reporter_index=1,
                      reporter_found='U.S.')
         ]),
         # Basic test of non-case name before citation (should not be found)
         ('lissner test 1 U.S. 1', [
             Citation(volume=1,
                      reporter='U.S.',
                      page=1,
                      canonical_reporter=u'U.S.',
                      lookup_index=0,
                      court='scotus',
                      reporter_index=3,
                      reporter_found='U.S.')
         ]),
         # Test with plaintiff and defendant
         ('lissner v. test 1 U.S. 1', [
             Citation(plaintiff='lissner',
                      defendant='test',
                      volume=1,
                      reporter='U.S.',
                      page=1,
                      canonical_reporter=u'U.S.',
                      lookup_index=0,
                      court='scotus',
                      reporter_index=4,
                      reporter_found='U.S.')
         ]),
         # Test with plaintiff, defendant and year
         ('lissner v. test 1 U.S. 1 (1982)', [
             Citation(plaintiff='lissner',
                      defendant='test',
                      volume=1,
                      reporter='U.S.',
                      page=1,
                      year=1982,
                      canonical_reporter=u'U.S.',
                      lookup_index=0,
                      court='scotus',
                      reporter_index=4,
                      reporter_found='U.S.')
         ]),
         # Test with different reporter than all of above.
         ('bob lissner v. test 1 F.2d 1 (1982)', [
             Citation(plaintiff='lissner',
                      defendant='test',
                      volume=1,
                      reporter='F.2d',
                      page=1,
                      year=1982,
                      canonical_reporter=u'F.',
                      lookup_index=0,
                      reporter_index=5,
                      reporter_found='F.2d')
         ]),
         # Test with court and extra information
         ('bob lissner v. test 1 U.S. 12, 347-348 (4th Cir. 1982)', [
             Citation(plaintiff='lissner',
                      defendant='test',
                      volume=1,
                      reporter='U.S.',
                      page=12,
                      year=1982,
                      extra=u'347-348',
                      court='ca4',
                      canonical_reporter=u'U.S.',
                      lookup_index=0,
                      reporter_index=5,
                      reporter_found='U.S.')
         ]),
         # Test with text before and after and a variant reporter
         ('asfd 22 U. S. 332 (1975) asdf', [
             Citation(volume=22,
                      reporter='U.S.',
                      page=332,
                      year=1975,
                      canonical_reporter=u'U.S.',
                      lookup_index=0,
                      court='scotus',
                      reporter_index=2,
                      reporter_found='U. S.')
         ]),
         # Test with finding reporter when it's a second edition
         ('asdf 22 A.2d 332 asdf', [
             Citation(volume=22,
                      reporter='A.2d',
                      page=332,
                      canonical_reporter=u'A.',
                      lookup_index=0,
                      reporter_index=2,
                      reporter_found='A.2d')
         ]),
         # Test finding a variant second edition reporter
         ('asdf 22 A. 2d 332 asdf', [
             Citation(volume=22,
                      reporter='A.2d',
                      page=332,
                      canonical_reporter=u'A.',
                      lookup_index=0,
                      reporter_index=2,
                      reporter_found='A. 2d')
         ]),
         # Test finding a variant of an edition resolvable by variant alone.
         ('171 Wn.2d 1016', [
             Citation(volume=171,
                      reporter='Wash. 2d',
                      page=1016,
                      canonical_reporter=u'Wash.',
                      lookup_index=1,
                      reporter_index=1,
                      reporter_found='Wn.2d')
         ]),
         # Test finding two citations where one of them has abutting
         # punctuation.
         ('2 U.S. 3, 4-5 (3 Atl. 33)', [
             Citation(volume=2,
                      reporter="U.S.",
                      page=3,
                      extra=u'4-5',
                      canonical_reporter=u"U.S.",
                      lookup_index=0,
                      reporter_index=1,
                      reporter_found="U.S.",
                      court='scotus'),
             Citation(volume=3,
                      reporter="A.",
                      page=33,
                      canonical_reporter=u"A.",
                      lookup_index=0,
                      reporter_index=5,
                      reporter_found="Atl.")
         ]),
         # Test with the page number as a Roman numeral
         ('12 Neb. App. lxiv (2004)', [
             Citation(volume=12,
                      reporter='Neb. Ct. App.',
                      page='lxiv',
                      year=2004,
                      canonical_reporter=u'Neb. Ct. App.',
                      lookup_index=0,
                      reporter_index=1,
                      reporter_found='Neb. App.')
         ]),
         # Test with the 'digit-REPORTER-digit' corner-case formatting
         ('2007-NMCERT-008', [
             Citation(volume=2007,
                      reporter='NMCERT',
                      page=8,
                      canonical_reporter=u'NMCERT',
                      lookup_index=0,
                      reporter_index=1,
                      reporter_found='NMCERT')
         ]),
         ('2006-Ohio-2095', [
             Citation(volume=2006,
                      reporter='Ohio',
                      page=2095,
                      canonical_reporter=u'Ohio',
                      lookup_index=0,
                      reporter_index=1,
                      reporter_found='Ohio')
         ]),
     )
     for q, a in test_pairs:
         print "Testing citation extraction for %s..." % q,
         cites_found = get_citations(q)
         self.assertEqual(
             cites_found,
             a,
             msg='%s\n%s\n\n    !=\n\n%s' % (
                 q,
                 ",\n".join([str(cite.__dict__) for cite in cites_found]),
                 ",\n".join([str(cite.__dict__) for cite in a]),
             ))
         print "✓"
Пример #4
0
 def test_disambiguate_citations(self):
     test_pairs = [
         # 1. P.R.R --> Correct abbreviation for a reporter.
         ('1 P.R.R. 1', [
             Citation(volume=1,
                      reporter='P.R.R.',
                      page=1,
                      canonical_reporter=u'P.R.R.',
                      lookup_index=0,
                      reporter_index=1,
                      reporter_found='P.R.R.')
         ]),
         # 2. U. S. --> A simple variant to resolve.
         ('1 U. S. 1', [
             Citation(volume=1,
                      reporter='U.S.',
                      page=1,
                      canonical_reporter=u'U.S.',
                      lookup_index=0,
                      court='scotus',
                      reporter_index=1,
                      reporter_found='U. S.')
         ]),
         # 3. A.2d --> Not a variant, but needs to be looked up in the
         #    EDITIONS variable.
         ('1 A.2d 1', [
             Citation(volume=1,
                      reporter='A.2d',
                      page=1,
                      canonical_reporter=u'A.',
                      lookup_index=0,
                      reporter_index=1,
                      reporter_found='A.2d')
         ]),
         # 4. A. 2d --> An unambiguous variant of an edition
         ('1 A. 2d 1', [
             Citation(volume=1,
                      reporter='A.2d',
                      page=1,
                      canonical_reporter=u'A.',
                      lookup_index=0,
                      reporter_index=1,
                      reporter_found='A. 2d')
         ]),
         # 5. P.R. --> A variant of 'Pen. & W.', 'P.R.R.', or 'P.' that's
         #    resolvable by year
         (
             '1 P.R. 1 (1831)',
             # Of the three, only Pen & W. was being published this year.
             [
                 Citation(volume=1,
                          reporter='Pen. & W.',
                          page=1,
                          canonical_reporter=u'Pen. & W.',
                          lookup_index=0,
                          year=1831,
                          reporter_index=1,
                          reporter_found='P.R.')
             ]),
         # 5.1: W.2d --> A variant of an edition that either resolves to
         #      'Wis. 2d' or 'Wash. 2d' and is resolvable by year.
         (
             '1 W.2d 1 (1854)',
             # Of the two, only Wis. 2d was being published this year.
             [
                 Citation(volume=1,
                          reporter='Wis. 2d',
                          page=1,
                          canonical_reporter=u'Wis.',
                          lookup_index=0,
                          year=1854,
                          reporter_index=1,
                          reporter_found='W.2d')
             ]),
         # 5.2: Wash. --> A non-variant that has more than one reporter for
         #      the key, but is resolvable by year
         ('1 Wash. 1 (1890)', [
             Citation(volume=1,
                      reporter='Wash.',
                      page=1,
                      canonical_reporter=u'Wash.',
                      lookup_index=1,
                      year=1890,
                      reporter_index=1,
                      reporter_found='Wash.')
         ]),
         # 6. Cr. --> A variant of Cranch, which is ambiguous, except with
         #    paired with this variation.
         ('1 Cra. 1', [
             Citation(volume=1,
                      reporter='Cranch',
                      page=1,
                      canonical_reporter=u'Cranch',
                      lookup_index=0,
                      court='scotus',
                      reporter_index=1,
                      reporter_found='Cra.')
         ]),
         # 7. Cranch. --> Not a variant, but could refer to either Cranch's
         #    Supreme Court cases or his DC ones. In this case, we cannot
         #    disambiguate. Years are not known, and we have no further
         #    clues. We must simply drop Cranch from the results.
         ('1 Cranch 1 1 U.S. 23', [
             Citation(volume=1,
                      reporter='U.S.',
                      page=23,
                      canonical_reporter=u'U.S.',
                      lookup_index=0,
                      court='scotus',
                      reporter_index=4,
                      reporter_found='U.S.')
         ]),
         # 8. Unsolved problem. In theory, we could use parallel citations
         #    to resolve this, because Rob is getting cited next to La., but
         #    we don't currently know the proximity of citations to each
         #    other, so can't use this.
         #  - Rob. --> Either:
         #                8.1: A variant of Robards (1862-1865) or
         #                8.2: Robinson's Louisiana Reports (1841-1846) or
         #                8.3: Robinson's Virgina Reports (1842-1865)
         # ('1 Rob. 1 1 La. 1',
         # [Citation(volume=1, reporter='Rob.', page=1,
         #                          canonical_reporter='Rob.',
         #                          lookup_index=0),
         #  Citation(volume=1, reporter='La.', page=1,
         #                          canonical_reporter='La.',
         #                          lookup_index=0)]),
     ]
     for pair in test_pairs:
         print "Testing disambiguation for %s..." % pair[0],
         citations = get_citations(pair[0], html=False)
         self.assertEqual(citations,
                          pair[1],
                          msg='%s\n%s != \n%s' %
                          (pair[0], [cite.__dict__ for cite in citations
                                     ], [cite.__dict__
                                         for cite in pair[1]]))
         print "✓"