Exemplo n.º 1
0
    def test_ingest_opinions(self):
        """Can we successfully ingest opinions at a high level?"""
        site = test_opinion_scraper.Site()
        site.method = "LOCAL"
        parsed_site = site.parse()
        cl_scrape_opinions.Command().scrape_court(parsed_site, full_crawl=True)

        opinions = Opinion.objects.all()
        self.assertTrue(opinions.count() == 6, 'Should have 6 test opinions.')
Exemplo n.º 2
0
    def test_ingest_opinions_from_scraper(self) -> None:
        """Can we successfully ingest opinions at a high level?"""
        site = test_opinion_scraper.Site()
        site.method = "LOCAL"
        parsed_site = site.parse()
        cl_scrape_opinions.Command().scrape_court(parsed_site,
                                                  full_crawl=True,
                                                  ocr_available=False)

        opinions = Opinion.objects.all()
        count = opinions.count()
        self.assertTrue(
            opinions.count() == 6,
            "Should have 6 test opinions, not %s" % count,
        )
Exemplo n.º 3
0
    def test_abort_on_unchanged_court_website(self):
        """Similar to the above, but we create a UrlHash object before
        checking if it exists."""
        site = test_opinion_scraper.Site()
        site.hash = 'this is a dummy hash code string'
        for dup_checker in self.dup_checkers:
            UrlHash(id=site.url, sha1=site.hash).save()
            abort = dup_checker.abort_by_url_hash(site.url, site.hash)
            if dup_checker.full_crawl:
                self.assertFalse(
                    abort, "DupChecker says to abort during a full crawl.")
            else:
                self.assertTrue(
                    abort,
                    "DupChecker says not to abort on a court that's been "
                    "crawled before with the same hash")

            dup_checker.url_hash.delete()
Exemplo n.º 4
0
    def test_abort_when_new_court_website(self):
        """Tests what happens when a new website is discovered."""
        site = test_opinion_scraper.Site()
        site.hash = 'this is a dummy hash code string'

        for dup_checker in self.dup_checkers:
            abort = dup_checker.abort_by_url_hash(site.url, site.hash)
            if dup_checker.full_crawl:
                self.assertFalse(
                    abort, "DupChecker says to abort during a full crawl.")
            else:
                self.assertFalse(
                    abort,
                    "DupChecker says to abort on a court that's never been "
                    "crawled before.")

            # The checking function creates url2Hashes, that we must delete as
            # part of cleanup.
            dup_checker.url_hash.delete()
Exemplo n.º 5
0
 def test_press_on_with_an_empty_database(self):
     site = test_opinion_scraper.Site()
     site.hash = 'this is a dummy hash code string'
     for dup_checker in self.dup_checkers:
         onwards = dup_checker.press_on(Opinion,
                                        now(),
                                        now() - timedelta(days=1),
                                        lookup_value='content',
                                        lookup_by='sha1')
         if dup_checker.full_crawl:
             self.assertTrue(
                 onwards,
                 "DupChecker says to abort during a full crawl. This should "
                 "never happen.")
         elif dup_checker.full_crawl is False:
             count = Opinion.objects.all().count()
             self.assertTrue(
                 onwards,
                 "DupChecker says to abort on dups when the database has %s "
                 "Documents." % count)
Exemplo n.º 6
0
    def test_abort_on_changed_court_website(self):
        """Similar to the above, but we create a UrlHash with a different
        hash before checking if it exists.
        """
        site = test_opinion_scraper.Site()
        site.hash = 'this is a dummy hash code string'
        for dup_checker in self.dup_checkers:
            UrlHash(pk=site.url, sha1=site.hash).save()
            abort = dup_checker.abort_by_url_hash(
                site.url, "this is a *different* hash!")
            if dup_checker.full_crawl:
                self.assertFalse(
                    abort, "DupChecker says to abort during a full crawl.")
            else:
                self.assertFalse(
                    abort,
                    "DupChecker says to abort on a court where the hash has "
                    "changed.")

            dup_checker.url_hash.delete()
Exemplo n.º 7
0
 def test_parsing_xml_opinion_site_to_site_object(self):
     """Does a basic parse of a site reveal the right number of items?"""
     site = test_opinion_scraper.Site().parse()
     self.assertEqual(len(site.case_names), 6)