Пример #1
0
def find_citations_for_opinion_by_pks(self, opinion_pks, index=True):
    """Find citations for search.Opinion objects.

    :param opinion_pks: An iterable of search.Opinion PKs
    :param index: Whether to add the item to Solr
    :return: None
    """
    opinions = Opinion.objects.filter(pk__in=opinion_pks)
    for opinion in opinions:
        # Returns a list of Citation objects, i.e., something like
        # [FullCitation, FullCitation, ShortformCitation, FullCitation,
        #   SupraCitation, SupraCitation, ShortformCitation, FullCitation]
        citations = get_document_citations(opinion)

        # Match all those different Citation objects to Opinion objects, using
        # a variety of hueristics.
        try:
            citation_matches = match_citations.get_citation_matches(
                opinion, citations
            )
        except ResponseNotReady as e:
            # Threading problem in httplib, which is used in the Solr query.
            raise self.retry(exc=e, countdown=2)

        # Consolidate duplicate matches, keeping a counter of how often each
        # match appears (so we know how many times an opinion cites another).
        # keys = cited opinion
        # values = number of times that opinion is cited
        grouped_matches = Counter(citation_matches)

        for matched_opinion in grouped_matches:
            # Increase citation count for matched cluster if it hasn't
            # already been cited by this opinion.
            if matched_opinion not in opinion.opinions_cited.all():
                matched_opinion.cluster.citation_count += 1
                matched_opinion.cluster.save(index=index)

        # Only update things if we found citations
        if citations:
            opinion.html_with_citations = create_cited_html(opinion, citations)

            # Nuke existing citations
            OpinionsCited.objects.filter(citing_opinion_id=opinion.pk).delete()

            # Create the new ones.
            OpinionsCited.objects.bulk_create(
                [
                    OpinionsCited(
                        citing_opinion_id=opinion.pk,
                        cited_opinion_id=matched_opinion.pk,
                        depth=grouped_matches[matched_opinion],
                    )
                    for matched_opinion in grouped_matches
                ]
            )

        # Update Solr if requested. In some cases we do it at the end for
        # performance reasons.
        opinion.save(index=index)
Пример #2
0
def find_citations_for_opinion_by_pks(self, opinion_pks, index=True):
    """Find citations for search.Opinion objects.

    :param opinion_pks: An iterable of search.Opinion PKs
    :param index: Whether to add the item to Solr
    :return: None
    """
    opinions = Opinion.objects.filter(pk__in=opinion_pks)
    for opinion in opinions:
        # Returns a list of Citation objects, i.e., something like
        # [FullCitation, FullCitation, ShortformCitation, FullCitation,
        #   SupraCitation, SupraCitation, ShortformCitation, FullCitation]
        citations = get_document_citations(opinion)

        # If no citations are found, continue
        if not citations:
            continue

        # Match all those different Citation objects to Opinion objects, using
        # a variety of hueristics.
        try:
            citation_matches = match_citations.get_citation_matches(
                opinion, citations)
        except ResponseNotReady as e:
            # Threading problem in httplib, which is used in the Solr query.
            raise self.retry(exc=e, countdown=2)

        # Consolidate duplicate matches, keeping a counter of how often each
        # match appears (so we know how many times an opinion cites another).
        # keys = cited opinion
        # values = number of times that opinion is cited
        grouped_matches = Counter(citation_matches)

        # Increase the citation count for the cluster of each matched opinion
        # if that cluster has not already been cited by this opinion. First,
        # calculate a list of the IDs of every opinion whose cluster will need
        # updating.
        all_cited_opinions = opinion.opinions_cited.all().values_list(
            "pk", flat=True)
        opinion_ids_to_update = set()
        for matched_opinion in grouped_matches:
            if matched_opinion.pk not in all_cited_opinions:
                opinion_ids_to_update.add(matched_opinion.pk)

        # Then, increment the citation_count fields for those matched clusters
        # all at once. Trigger a single Solr update as well, if required.
        opinion_clusters_to_update = OpinionCluster.objects.filter(
            sub_opinions__pk__in=opinion_ids_to_update)
        opinion_clusters_to_update.update(citation_count=F("citation_count") +
                                          1)
        if index:
            add_items_to_solr.delay(
                opinion_clusters_to_update.values_list("pk", flat=True),
                "search.OpinionCluster",
            )

        # Generate the citing opinion's new HTML (with inline citation links)
        opinion.html_with_citations = create_cited_html(opinion, citations)

        # Nuke existing citations
        OpinionsCited.objects.filter(citing_opinion_id=opinion.pk).delete()

        # Create the new ones.
        OpinionsCited.objects.bulk_create([
            OpinionsCited(
                citing_opinion_id=opinion.pk,
                cited_opinion_id=matched_opinion.pk,
                depth=grouped_matches[matched_opinion],
            ) for matched_opinion in grouped_matches
        ])

        # Save all the changes to the citing opinion
        opinion.save()

    # If a Solr update was requested, do a single one at the end with all the
    # pks of the passed opinions
    if index:
        add_items_to_solr.delay(opinion_pks, "search.Opinion")
Пример #3
0
    def test_citation_resolution(self) -> None:
        """Tests whether different types of citations (i.e., full, short form,
        supra, id) resolve correctly to opinion matches.
        """
        # fmt: off

        # Opinion fixture info:
        # pk=7 is mocked with name 'Foo v. Bar' and citation '1 U.S. 1'
        # pk=8 is mocked with name 'Qwerty v. Uiop' and citation '2 F.3d 2'
        # pk=9 is mocked with name 'Lorem v. Ipsum' and citation '1 U.S. 50'
        # pk=11 is mocked with name 'Abcdef v. Ipsum' and citation '1 U.S. 999'

        test_pairs = [
            # Simple test for matching a single, full citation
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.')
            ], [Opinion.objects.get(pk=7)]),

            # Test matching multiple full citations to different documents
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                FullCitation(volume=2,
                             reporter='F.3d',
                             page='2',
                             canonical_reporter='F.',
                             lookup_index=0,
                             court='ca1',
                             reporter_index=1,
                             reporter_found='F.3d')
            ], [Opinion.objects.get(pk=7),
                Opinion.objects.get(pk=8)]),

            # Test resolving a supra citation
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                SupraCitation(antecedent_guess='Bar', page='99', volume=1)
            ], [Opinion.objects.get(pk=7),
                Opinion.objects.get(pk=7)]),

            # Test resolving a supra citation when its antecedent guess matches
            # two possible candidates. We expect the supra citation to not
            # be matched.
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='50',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='999',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                SupraCitation(antecedent_guess='Ipsum', page='99', volume=1)
            ], [Opinion.objects.get(pk=9),
                Opinion.objects.get(pk=11)]),

            # Test resolving a short form citation with a meaningful antecedent
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                ShortformCitation(reporter='U.S.',
                                  page='99',
                                  volume=1,
                                  antecedent_guess='Bar,')
            ], [Opinion.objects.get(pk=7),
                Opinion.objects.get(pk=7)]),

            # Test resolving a short form citation when its reporter and
            # volume match two possible candidates. We expect its antecedent
            # guess to provide the correct tiebreaker.
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='50',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                ShortformCitation(reporter='U.S.',
                                  page='99',
                                  volume=1,
                                  antecedent_guess='Bar')
            ], [
                Opinion.objects.get(pk=7),
                Opinion.objects.get(pk=9),
                Opinion.objects.get(pk=7)
            ]),

            # Test resolving a short form citation when its reporter and
            # volume match two possible candidates, and when it lacks a
            # meaningful antecedent.
            # We expect the short form citation to not be matched.
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='50',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                ShortformCitation(reporter='U.S.',
                                  page='99',
                                  volume=1,
                                  antecedent_guess='somethingwrong')
            ], [Opinion.objects.get(pk=7),
                Opinion.objects.get(pk=9)]),

            # Test resolving a short form citation when its reporter and
            # volume match two possible candidates, and when its antecedent
            # guess also matches multiple possibilities.
            # We expect the short form citation to not be matched.
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='50',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='999',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                ShortformCitation(reporter='U.S.',
                                  page='99',
                                  volume=1,
                                  antecedent_guess='Ipsum')
            ], [Opinion.objects.get(pk=9),
                Opinion.objects.get(pk=11)]),

            # Test resolving a short form citation when its reporter and
            # volume are erroneous.
            # We expect the short form citation to not be matched.
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                ShortformCitation(reporter='F.3d',
                                  page='99',
                                  volume=26,
                                  antecedent_guess='somethingwrong')
            ], [Opinion.objects.get(pk=7)]),

            # Test resolving an Id. citation
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                IdCitation(id_token='id.', after_tokens=['a', 'b', 'c'])
            ], [Opinion.objects.get(pk=7),
                Opinion.objects.get(pk=7)]),

            # Test resolving an Id. citation when the previous citation match
            # failed because there is no clear antecedent. We expect the Id.
            # citation to also not be matched.
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                ShortformCitation(reporter='F.3d',
                                  page='99',
                                  volume=26,
                                  antecedent_guess='somethingwrong'),
                IdCitation(id_token='id.', after_tokens=['a', 'b', 'c'])
            ], [Opinion.objects.get(pk=7)]),

            # Test resolving an Id. citation when the previous citation match
            # failed because a normal full citation lookup returned nothing.
            # We expect the Id. citation to also not be matched.
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                FullCitation(volume=99,
                             reporter='U.S.',
                             page='99',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                IdCitation(id_token='id.', after_tokens=['a', 'b', 'c'])
            ], [Opinion.objects.get(pk=7)]),

            # Test resolving an Id. citation when the previous citation is to a
            # non-opinion document. Since we can't match those documents (yet),
            # we expect the Id. citation to also not be matched.
            ([
                FullCitation(volume=1,
                             reporter='U.S.',
                             page='1',
                             canonical_reporter='U.S.',
                             lookup_index=0,
                             court='scotus',
                             reporter_index=1,
                             reporter_found='U.S.'),
                NonopinionCitation(match_token='§99'),
                IdCitation(id_token='id.', after_tokens=['a', 'b', 'c'])
            ], [Opinion.objects.get(pk=7)]),

            # Test resolving an Id. citation when it is the first citation
            # found. Since there is nothing before it, we expect no matches to
            # be returned.
            ([IdCitation(id_token='id.', after_tokens=['a', 'b', 'c'])], [])
        ]

        # fmt: on
        for citations, expected_matches in test_pairs:
            print("Testing citation matching for %s..." % citations)

            # The citing opinion does not matter for this test
            citing_opinion = Opinion.objects.get(pk=1)

            citation_matches = get_citation_matches(citing_opinion, citations)
            self.assertEqual(
                citation_matches,
                expected_matches,
                msg="\n%s\n\n    !=\n\n%s" %
                (citation_matches, expected_matches),
            )
            print("✓")