def test_opinionscited_creation(self) -> None: """Make sure that found citations are stored in the database as OpinionsCited objects with the appropriate references and depth. """ # Opinion fixture info: # pk=10 is our mock citing opinion, containing a number of references # to other mocked opinions, mixed about. It's hard to exhaustively # test all combinations, but this test case is made to be deliberately # complex, in an effort to "trick" the algorithm. Cited opinions: # pk=7: 1 FullCaseCitation, 1 ShortCaseCitation, 1 SupraCitation (depth=3) # pk=8: 1 FullCaseCitation, 2 IdCitation (one Id. and one Ibid.), # 1 ShortCaseCitation, 2 SupraCitation (depth=6) # pk=9: 1 FullCaseCitation, 1 ShortCaseCitation (depth=2) remove_citations_from_imported_fixtures() citing = Opinion.objects.get(pk=10) find_citations_for_opinion_by_pks.delay([10]) test_pairs = [ (Opinion.objects.get(pk=7), 3), (Opinion.objects.get(pk=8), 6), (Opinion.objects.get(pk=9), 2), ] for cited, depth in test_pairs: with self.subTest( f"Testing OpinionsCited creation for {cited}...", cited=cited, depth=depth, ): self.assertEqual( OpinionsCited.objects.get(citing_opinion=citing, cited_opinion=cited).depth, depth, )
def test_citation_increment(self) -> None: """Make sure that found citations update the increment on the cited opinion's citation count""" remove_citations_from_imported_fixtures() # Updates d1's citation count in a Celery task find_citations_for_opinion_by_pks.delay([3]) cited = Opinion.objects.get(pk=2) expected_count = 1 self.assertEqual( cited.cluster.citation_count, expected_count, msg="'cited' was not updated by a citation found in 'citing', or " "the citation was not found. Count was: %s instead of %s" % (cited.cluster.citation_count, expected_count), )
def test_citation_matching(self): """Creates a few documents that contain specific citations, then attempts to find and match those citations. This becomes a bit of an integration test, which is fine. """ remove_citations_from_imported_fixtures() # Updates d1's citation count in a Celery task find_citations_for_opinion_by_pks.delay([3]) cited = Opinion.objects.get(pk=2) expected_count = 1 self.assertEqual( cited.cluster.citation_count, expected_count, msg=u"'cited' was not updated by a citation found in 'citing', or " u"the citation was not found. Count was: %s instead of %s" % (cited.cluster.citation_count, expected_count) )
def update_documents(self, opinion_pks, count): sys.stdout.write('Graph size is {0:d} nodes.\n'.format(self.count)) sys.stdout.flush() index_during_subtask = False if self.index == 'concurrently': index_during_subtask = True chunk = [] chunk_size = 100 processed_count = 0 throttle = CeleryThrottle(min_items=500) for opinion_pk in opinion_pks: processed_count += 1 last_item = (count == processed_count) chunk.append(opinion_pk) if processed_count % chunk_size == 0 or last_item: throttle.maybe_wait() find_citations_for_opinion_by_pks.delay( chunk, index_during_subtask) chunk = [] self.log_progress(processed_count, opinion_pk)