def test_make_html_from_matched_citation_objects(self) -> None: """Can we render matched citation objects as HTML?""" # This test case is similar to the two above, except it allows us to # test the rendering of citation objects that we assert are correctly # matched. (No matching is performed in the previous cases.) # fmt: off test_pairs = [ # Id. citation with page number ("Id., at 123, 124") ('asdf, Id., at 123, 124. Lorem ipsum dolor sit amet', '<pre class="inline">asdf, </pre><span class="citation" data-id="' 'MATCH_ID"><a href="MATCH_URL">Id., at 123, 124</a></span><pre ' 'class="inline">. Lorem ipsum dolor sit amet</pre>'), # Id. citation with complex page number ("Id. @ 123:1, ¶¶ 124") ('asdf, Id. @ 123:1, ¶¶ 124. Lorem ipsum dolor sit amet', '<pre class="inline">asdf, </pre><span class="citation" data-id=' '"MATCH_ID"><a href="MATCH_URL">Id.</a></span><pre class=' '"inline"> @ 123:1, ¶¶ 124. Lorem ipsum dolor sit amet</pre>'), # Id. citation without page number ("Id. Something else") ('asdf, Id. Lorem ipsum dolor sit amet', '<pre class="inline">asdf, </pre><span class="citation" data-id="' 'MATCH_ID"><a href="MATCH_URL">Id.</a></span><pre class="inline">' ' Lorem ipsum dolor sit amet</pre>'), ] # fmt: on for s, expected_html in test_pairs: with self.subTest( f"Testing object to HTML rendering for {s}...", s=s, expected_html=expected_html, ): opinion = Opinion(plain_text=s) get_and_clean_opinion_text(opinion) citations = get_citations(opinion.cleaned_text) # Stub out fake output from do_resolve_citations(), since the # purpose of this test is not to test that. We just need # something that looks like what create_cited_html() expects # to receive. Also make sure that the "matched" opinion is # mocked appropriately. opinion.pk = "MATCH_ID" opinion.cluster = Mock(OpinionCluster(id=24601)) opinion.cluster.get_absolute_url.return_value = "MATCH_URL" citation_resolutions = {opinion: citations} created_html = create_cited_html(opinion, citation_resolutions) self.assertEqual( created_html, expected_html, msg=f"\n{created_html}\n\n !=\n\n{expected_html}", )
def test_make_html_from_html(self) -> None: """Can we convert the HTML of an opinion into modified HTML?""" # fmt: off test_pairs = [ # Id. citation with HTML tags ('<div><p>the improper views of the Legislature.\" 2 <i>id., at ' '73.</i></p>\n<p>Nathaniel Gorham of Massachusetts</p></div>', '<div><p>the improper views of the Legislature." 2 <i><span ' 'class="citation no-link">id., at 73</span>.</i></p>\n<p>' 'Nathaniel Gorham of Massachusetts</p></div>'), # Id. citation with an intervening HTML tag # (We expect the HTML to be unchanged, since it's too risky to # modify with another tag in the way) ('<div><p>the improper views of the Legislature.\" 2 <i>id.,</i> ' 'at <b>73, bolded</b>.</p>\n<p>Nathaniel Gorham of Massachusetts' '</p></div>', '<div><p>the improper views of the Legislature.\" 2 <i>id.,</i> ' 'at <b>73, bolded</b>.</p>\n<p>Nathaniel Gorham of Massachusetts' '</p></div>'), # Ibid. citation with HTML tags ('<div><p>possess any peculiar knowledge of the mere policy of ' 'public measures.\" <i>Ibid.</i> Gerry of Massachusetts ' 'like</p></div>', '<div><p>possess any peculiar knowledge of the mere policy of ' 'public measures." <i><span class="citation no-link">Ibid.' '</span></i> Gerry of Massachusetts like</p></div>' ), ] # fmt: on for s, expected_html in test_pairs: with self.subTest( "Testing html to html conversion for %s..." % s, s=s, expected_html=expected_html, ): opinion = Opinion(html=s) get_and_clean_opinion_text(opinion) citations = get_citations(opinion.cleaned_text) # Stub out fake output from do_resolve_citations(), since the # purpose of this test is not to test that. We just need # something that looks like what create_cited_html() expects # to receive. citation_resolutions = {NO_MATCH_RESOURCE: citations} created_html = create_cited_html(opinion, citation_resolutions) self.assertEqual( created_html, expected_html, msg="\n%s\n\n !=\n\n%s" % (created_html, expected_html), )
def test_make_html_from_plain_text(self) -> None: """Can we convert the plain text of an opinion into HTML?""" # fmt: off test_pairs = [ # Simple example for full citations ('asdf 22 U.S. 33 asdf', '<pre class="inline">asdf </pre><span class="' 'citation no-link">22 U.S. 33</span><pre class="' 'inline"> asdf</pre>'), # Using a variant format for U.S. (Issue #409) ('asdf 22 U. S. 33 asdf', '<pre class="inline">asdf </pre><span class="' 'citation no-link">22 U. S. 33</span><pre class="' 'inline"> asdf</pre>'), # Full citation across line break ('asdf John v. Doe, 123\nU.S. 456, upholding foo bar', '<pre class="inline">asdf John v. Doe, </pre><span class="' 'citation no-link">123\nU.S. 456</span><pre class="inline">, ' 'upholding foo bar</pre>'), # Basic short form citation ('existing text asdf, 515 U.S., at 240. foobar', '<pre class="inline">existing text asdf, </pre><span class="' 'citation no-link">515 U.S., at 240</span><pre class="inline">. ' 'foobar</pre>'), # Short form citation with no comma after reporter in original ('existing text asdf, 1 U. S. at 2. foobar', '<pre class="inline">existing text asdf, </pre><span class="' 'citation no-link">1 U. S. at 2</span><pre class="inline">. ' 'foobar</pre>'), # Short form citation across line break ('asdf.’ ” 123 \n U.S., at 456. Foo bar foobar', '<pre class="inline">asdf.’ ” </pre><span class="citation ' 'no-link">123 \n U.S., at 456</span><pre class="inline">. Foo ' 'bar foobar</pre>'), # First kind of supra citation (standard kind) ('existing text asdf, supra, at 2. foobar', '<pre class="inline">existing text asdf, </pre><span class="' 'citation no-link">supra, at 2</span><pre class="inline">. ' 'foobar</pre>'), # Second kind of supra citation (with volume) ('existing text asdf, 123 supra, at 2. foo bar', '<pre class="inline">existing text asdf, 123 </pre><span class="' 'citation no-link">supra, at 2</span><pre class="inline">. foo ' 'bar</pre>'), # Third kind of supra citation (sans page) ('existing text asdf, supra, foo bar', '<pre class="inline">existing text asdf, </pre><span class="' 'citation no-link">supra,</span><pre class="inline"> foo bar' '</pre>'), # Fourth kind of supra citation (with period) ('existing text asdf, supra. foo bar', '<pre class="inline">existing text asdf, </pre><span class="' 'citation no-link">supra.</span><pre class="inline"> foo bar' '</pre>'), # Supra citation across line break ('existing text asdf, supra, at\n99 (quoting foo)', '<pre class="inline">existing text asdf, </pre><span class="' 'citation no-link">supra, at\n99</span><pre class="inline"> ' '(quoting foo)</pre>'), # Id. citation ("Id., at 123") ('asdf, id., at 123. Lorem ipsum dolor sit amet', '<pre class="inline">asdf, </pre><span class="citation no-link">' 'id., at 123</span><pre class="inline">. Lorem ipsum dolor sit ' 'amet</pre>'), # Duplicate Id. citation ('asd, id., at 123. Lo rem ip sum. asdf, id., at 123. Lo rem ip.', '<pre class="inline">asd, </pre><span class="citation no-link">' 'id., at 123</span><pre class="inline">. Lo rem ip sum. asdf, ' '</pre><span class="citation no-link">id., at 123</span><pre ' 'class="inline">. Lo rem ip.</pre>'), # Id. citation across line break ('asdf." Id., at 315.\n Lorem ipsum dolor sit amet', '<pre class="inline">asdf." </pre><span class="citation ' 'no-link">Id., at 315</span><pre class="inline">.\n Lorem ' 'ipsum dolor sit amet</pre>'), # Ibid. citation ("... Ibid.") ('asdf, Ibid. Lorem ipsum dolor sit amet', '<pre class="inline">asdf, </pre><span class="citation no-link">' 'Ibid.</span><pre class="inline"> Lorem ipsum dolor sit amet' '</pre>'), # NonopinionCitation ('Lorem ipsum dolor sit amet. U.S. Code §3617. Foo bar.', '<pre class="inline">Lorem ipsum dolor sit amet. U.S. Code </pre>' '<span class="citation no-link">§3617.</span><pre class="inline">' ' Foo bar.</pre>'), ] # fmt: on for s, expected_html in test_pairs: with self.subTest( f"Testing plain text to html conversion for {s}...", s=s, expected_html=expected_html, ): opinion = Opinion(plain_text=s) get_and_clean_opinion_text(opinion) citations = get_citations(opinion.cleaned_text) # Stub out fake output from do_resolve_citations(), since the # purpose of this test is not to test that. We just need # something that looks like what create_cited_html() expects # to receive. citation_resolutions = {NO_MATCH_RESOURCE: citations} created_html = create_cited_html(opinion, citation_resolutions) self.assertEqual( created_html, expected_html, msg=f"\n{created_html}\n\n !=\n\n{expected_html}", )
def find_citations_for_opinion_by_pks( self, opinion_pks: List[int], index: bool = True, ) -> None: """Find citations for search.Opinion objects. :param opinion_pks: An iterable of search.Opinion PKs :param index: Whether to add the item to Solr :return: None """ opinions: List[Opinion] = Opinion.objects.filter(pk__in=opinion_pks) for opinion in opinions: # Memoize parsed versions of the opinion's text get_and_clean_opinion_text(opinion) # Extract the citations from the opinion's text citations: List[CitationBase] = get_citations(opinion.cleaned_text) # If no citations are found, continue if not citations: continue # Resolve all those different citation objects to Opinion objects, # using a variety of heuristics. try: citation_resolutions: Dict[ MatchedResourceType, List[SupportedCitationType]] = do_resolve_citations( citations, opinion) except ResponseNotReady as e: # Threading problem in httplib, which is used in the Solr query. raise self.retry(exc=e, countdown=2) # Generate the citing opinion's new HTML with inline citation links opinion.html_with_citations = create_cited_html( opinion, citation_resolutions) # Delete the unmatched citations citation_resolutions.pop(NO_MATCH_RESOURCE, None) # Increase the citation count for the cluster of each matched opinion # if that cluster has not already been cited by this opinion. First, # calculate a list of the IDs of every opinion whose cluster will need # updating. all_cited_opinions = opinion.opinions_cited.all().values_list( "pk", flat=True) opinion_ids_to_update = set() for _opinion in citation_resolutions.keys(): if _opinion.pk not in all_cited_opinions: opinion_ids_to_update.add(_opinion.pk) # Finally, commit these changes to the database in a single # transcation block. Trigger a single Solr update as well, if # required. with transaction.atomic(): opinion_clusters_to_update = OpinionCluster.objects.filter( sub_opinions__pk__in=opinion_ids_to_update) opinion_clusters_to_update.update( citation_count=F("citation_count") + 1) if index: add_items_to_solr.delay( opinion_clusters_to_update.values_list("pk", flat=True), "search.OpinionCluster", ) # Nuke existing citations OpinionsCited.objects.filter(citing_opinion_id=opinion.pk).delete() # Create the new ones. OpinionsCited.objects.bulk_create([ OpinionsCited( citing_opinion_id=opinion.pk, cited_opinion_id=_opinion.pk, depth=len(_citations), ) for _opinion, _citations in citation_resolutions.items() ]) # Save all the changes to the citing opinion (send to solr later) opinion.save(index=False) # If a Solr update was requested, do a single one at the end with all the # pks of the passed opinions if index: add_items_to_solr.delay(opinion_pks, "search.Opinion")