def test_batch_delete_over_500_ingest_infos_for_region(self):
        task_hash = hash(
            json.dumps(
                Task(
                    task_type=constants.TaskType.SCRAPE_DATA,
                    endpoint=TEST_ENDPOINT,
                    response_type=constants.ResponseType.TEXT,
                ).to_serializable(),
                sort_keys=True,
            ))
        start_time = datetime.now()

        # The Datastore limit for entity writes in one call is 500. Confirm
        # that batch delete is properly handled when more than 500 entities
        # exist for the same region.
        for i in range(600):
            datastore_ingest_info.write_ingest_info(
                region="us_state_county",
                session_start_time=start_time,
                ingest_info=sample_ingest_info(str(i)),
                task_hash=task_hash,
            )

        datastore_ingest_info.batch_delete_ingest_infos_for_region(
            "us_state_county")

        assert (datastore_ingest_info.batch_get_ingest_infos_for_region(
            "us_state_county", start_time) == [])
Exemplo n.º 2
0
def write(ingest_info: IngestInfo, scrape_key: ScrapeKey, task: Task):
    session = sessions.get_current_session(scrape_key)
    if not session:
        raise DatastoreError(scrape_key.region_code, "write")
    datastore_ingest_info.write_ingest_info(
        region=scrape_key.region_code,
        session_start_time=session.start,
        ingest_info=ingest_info,
        task_hash=hash(json.dumps(task.to_serializable(), sort_keys=True)))
    def test_batch_get_ingest_infos_for_region(self):
        task_hash = hash(
            json.dumps(
                Task(
                    task_type=constants.TaskType.SCRAPE_DATA,
                    endpoint=TEST_ENDPOINT,
                    response_type=constants.ResponseType.TEXT,
                ).to_serializable(),
                sort_keys=True,
            ))

        start_time = datetime.now()
        first = datastore_ingest_info.write_ingest_info(
            region="us_state_county",
            session_start_time=start_time,
            ingest_info=sample_ingest_info("1"),
            task_hash=task_hash,
        )
        second = datastore_ingest_info.write_ingest_info(
            region="us_state_county",
            session_start_time=start_time,
            ingest_info=sample_ingest_info("2"),
            task_hash=task_hash,
        )
        third = datastore_ingest_info.write_ingest_info(
            region="us_state_county",
            session_start_time=start_time,
            ingest_info=sample_ingest_info("3"),
            task_hash=task_hash,
        )
        datastore_ingest_info.write_ingest_info(
            region="unrelated",
            session_start_time=start_time,
            ingest_info=sample_ingest_info("n/a"),
            task_hash=task_hash,
        )

        results = datastore_ingest_info.batch_get_ingest_infos_for_region(
            "us_state_county", start_time)

        assert results == [first, second, third]
        datastore_ingest_info.batch_delete_ingest_infos_for_region(
            "us_state_county")
Exemplo n.º 4
0
    def test_write_single_ingest_info(self):
        task_hash = hash(
            json.dumps(Task(
                task_type=constants.TaskType.SCRAPE_DATA,
                endpoint=TEST_ENDPOINT,
                response_type=constants.ResponseType.TEXT).to_serializable(),
                       sort_keys=True))

        start_time = datetime.now()
        ingest_info = datastore_ingest_info.write_ingest_info(
            region='us_state_county',
            session_start_time=start_time,
            ingest_info=sample_ingest_info('1'),
            task_hash=task_hash)
        results = datastore_ingest_info.batch_get_ingest_infos_for_region(
            'us_state_county', start_time)
        assert results == [ingest_info]
        datastore_ingest_info.batch_delete_ingest_infos_for_region(
            'us_state_county')
Exemplo n.º 5
0
    def test_batch_delete_ingest_infos_for_region(self):
        task_hash = hash(
            json.dumps(Task(
                task_type=constants.TaskType.SCRAPE_DATA,
                endpoint=TEST_ENDPOINT,
                response_type=constants.ResponseType.TEXT).to_serializable(),
                       sort_keys=True))
        start_time = datetime.now()

        datastore_ingest_info.write_ingest_info(
            region='us_state_county',
            session_start_time=start_time,
            ingest_info=sample_ingest_info('1'),
            task_hash=task_hash)
        datastore_ingest_info.write_ingest_info(
            region='us_state_county',
            session_start_time=start_time,
            ingest_info=sample_ingest_info('2'),
            task_hash=task_hash)
        datastore_ingest_info.write_ingest_info(
            region='us_state_county',
            session_start_time=start_time,
            ingest_info=sample_ingest_info('3'),
            task_hash=task_hash)
        unrelated = datastore_ingest_info \
            .write_ingest_info(region='unrelated_us_state_county',
                               session_start_time=start_time,
                               ingest_info=sample_ingest_info('n/a'),
                               task_hash=task_hash)

        datastore_ingest_info.batch_delete_ingest_infos_for_region(
            'us_state_county')

        assert datastore_ingest_info.batch_get_ingest_infos_for_region(
            'us_state_county', start_time) == []

        actual = datastore_ingest_info.batch_get_ingest_infos_for_region(
            'unrelated_us_state_county', start_time)
        assert actual == [unrelated]

        datastore_ingest_info.batch_delete_ingest_infos_for_region(
            'unrelated_us_state_county')