def test_scrape_data_and_more_no_persist_second_time_persist( self, mock_get_more, mock_fetch, mock_populate, mock_write): populate_task = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA) mock_get_more.return_value = [populate_task] mock_fetch.return_value = (TEST_HTML, {}) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=False, ) start_time = datetime.datetime.now() t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA_AND_MORE) req = QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=t, scraper_start_time=start_time, ) scraper = FakeScraper("test") scraper.BATCH_WRITES = False scraper._generic_scrape(req) # Should send the ii since we chose not to persist. expected_tasks = [ QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=populate_task, scraper_start_time=start_time, ingest_info=self.ii, ) ] self.assertEqual(mock_get_more.call_count, 1) self.assertEqual(mock_populate.call_count, 1) self.assertEqual(mock_write.call_count, 0) mock_get_more.assert_called_once_with(TEST_HTML, t) self.assertCountEqual(expected_tasks, scraper.tasks) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=True, ) scraper._generic_scrape(scraper.tasks[0]) self.assertEqual(mock_get_more.call_count, 1) self.assertEqual(mock_populate.call_count, 2) self.assertEqual(mock_write.call_count, 1) expected_metadata = IngestMetadata( scraper.region.region_code, scraper.region.jurisdiction_id, start_time, scraper.get_enum_overrides(), ) expected_proto = convert_ingest_info_to_proto(self.ii) mock_write.assert_called_once_with(expected_proto, expected_metadata)
def test_scrape_data_no_more_tasks_batch( self, mock_get_more: Mock, mock_fetch: Mock, mock_populate: Mock, mock_write: Mock, mock_batch_write: Mock, ) -> None: mock_fetch.return_value = (TEST_HTML, {}) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=True, ) start_time = datetime.datetime.now() t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA) req = QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=t, scraper_start_time=start_time, ) scraper = FakeScraper("test") scraper._generic_scrape(req) scrape_key = ScrapeKey("test", constants.ScrapeType.BACKGROUND) self.assertEqual(mock_get_more.call_count, 0) self.assertEqual(mock_populate.call_count, 1) self.assertEqual(mock_write.call_count, 0) mock_batch_write.assert_called_once_with( ingest_info=self.ii, task=t, scrape_key=scrape_key, ) self.assertEqual(len(scraper.tasks), 0)
def test_scrape_data_no_more_tasks(self, mock_get_more, mock_fetch, mock_populate, mock_write): mock_fetch.return_value = (TEST_HTML, {}) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=True, ) start_time = datetime.datetime.now() t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA) req = QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=t, scraper_start_time=start_time, ) scraper = FakeScraper("test") scraper.BATCH_WRITES = False scraper._generic_scrape(req) expected_metadata = IngestMetadata( scraper.region.region_code, scraper.region.jurisdiction_id, start_time, scraper.get_enum_overrides(), ) expected_proto = convert_ingest_info_to_proto(self.ii) self.assertEqual(mock_get_more.call_count, 0) self.assertEqual(mock_populate.call_count, 1) self.assertEqual(mock_write.call_count, 1) mock_write.assert_called_once_with(expected_proto, expected_metadata) self.assertEqual(len(scraper.tasks), 0)
def test_scrape_data_and_more_no_persist( self, mock_get_more, mock_fetch, mock_populate, mock_write): mock_get_more.return_value = [TEST_TASK] mock_fetch.return_value = (TEST_HTML, {}) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=False, ) start_time = datetime.datetime.now() t = Task.evolve( TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA_AND_MORE) req = QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=t, scraper_start_time=start_time ) scraper = FakeScraper('test') scraper.BATCH_WRITES = False scraper._generic_scrape(req) # Should send the ii since we chose not to persist. expected_tasks = [QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=TEST_TASK, scraper_start_time=start_time, ingest_info=self.ii )] self.assertEqual(mock_get_more.call_count, 1) self.assertEqual(mock_populate.call_count, 1) self.assertEqual(mock_write.call_count, 0) mock_get_more.assert_called_once_with(TEST_HTML, t) self.assertCountEqual(expected_tasks, scraper.tasks)
def test_scrape_data_and_more_yes_persist( self, mock_get_more: Mock, mock_fetch: Mock, mock_populate: Mock, mock_write: Mock, ) -> None: mock_get_more.return_value = [TEST_TASK] mock_fetch.return_value = (TEST_HTML, {}) mock_populate.return_value = ScrapedData( ingest_info=self.ii, persist=True, ) start_time = datetime.datetime.now() t = Task.evolve(TEST_TASK, task_type=constants.TaskType.SCRAPE_DATA_AND_MORE) req = QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=t, scraper_start_time=start_time, ) scraper = FakeScraper("test") scraper.BATCH_WRITES = False scraper._generic_scrape(req) # Should send the ii since we chose not to persist. expected_tasks = [ QueueRequest( scrape_type=constants.ScrapeType.BACKGROUND, next_task=TEST_TASK, scraper_start_time=start_time, ) ] expected_metadata = IngestMetadata( region=scraper.region.region_code, jurisdiction_id=scraper.region.jurisdiction_id, ingest_time=start_time, enum_overrides=scraper.get_enum_overrides(), system_level=SystemLevel.COUNTY, database_key=SQLAlchemyDatabaseKey.for_schema(SchemaType.JAILS), ) expected_proto = convert_ingest_info_to_proto(self.ii) self.assertEqual(mock_get_more.call_count, 1) self.assertEqual(mock_populate.call_count, 1) self.assertEqual(mock_write.call_count, 1) mock_write.assert_called_once_with(expected_proto, expected_metadata) self.assertCountEqual(expected_tasks, scraper.tasks)
def testBoth(self): ScrapedData( ingest_info=IngestInfo(people=[Person(race=Race("ASIAN"))]), single_counts=[SingleCount(count=123)], persist=True)
def testSingleCount(self): ScrapedData(single_counts=[SingleCount(count=123)], persist=True)
def testIngestInfo(self): ScrapedData( ingest_info=IngestInfo(people=[Person(race=Race("ASIAN"))]), persist=True)
def testRaiseWithNothing(self): with self.assertRaises(ScraperError): ScrapedData(ingest_info=None, persist=True)