def test_get_ingest_view_metadata_pending_export_basic(self):
        args = GcsfsIngestViewExportArgs(
            ingest_view_name='file_tag',
            upper_bound_datetime_prev=datetime.datetime(
                2015, 1, 2, 2, 2, 2, 2),
            upper_bound_datetime_to_export=datetime.datetime(
                2015, 1, 2, 3, 3, 3, 3))
        with freeze_time('2015-01-02T03:06:06'):
            self.metadata_manager.register_ingest_file_export_job(args)

        expected_list = [
            DirectIngestIngestFileMetadata.new_with_defaults(
                region_code='US_XX',
                file_tag='file_tag',
                is_invalidated=False,
                is_file_split=False,
                job_creation_time=datetime.datetime(2015, 1, 2, 3, 6, 6),
                datetimes_contained_lower_bound_exclusive=datetime.datetime(
                    2015, 1, 2, 2, 2, 2, 2),
                datetimes_contained_upper_bound_inclusive=datetime.datetime(
                    2015, 1, 2, 3, 3, 3, 3))
        ]

        self.assertEqual(
            expected_list,
            self.metadata_manager.get_ingest_view_metadata_pending_export())
示例#2
0
    def test_getIngestViewExportTaskArgs_happy(self) -> None:
        # Arrange
        region = self.create_fake_region(ingest_view_exports_enabled=True)
        export_manager = self.create_export_manager(region)
        export_manager.file_metadata_manager.get_ingest_view_metadata_for_most_recent_valid_job = Mock(  # type: ignore
            return_value=DirectIngestIngestFileMetadata(
                file_id=_ID,
                region_code=region.region_code,
                file_tag="ingest_view",
                normalized_file_name="normalized_file_name",
                processed_time=_DATE_1,
                is_invalidated=False,
                is_file_split=False,
                job_creation_time=_DATE_1,
                export_time=_DATE_1,
                datetimes_contained_lower_bound_exclusive=_DATE_1,
                datetimes_contained_upper_bound_inclusive=_DATE_1,
                discovery_time=_DATE_1,
            ))
        export_manager.file_metadata_manager.get_metadata_for_raw_files_discovered_after_datetime = Mock(  # type: ignore
            return_value=[
                DirectIngestRawFileMetadata(
                    file_id=2,
                    region_code=region.region_code,
                    file_tag="ingest_view",
                    discovery_time=_DATE_2,
                    normalized_file_name=
                    "unprocessed_2015-01-02T03:03:03:000003_raw_file_tag.csv",
                    processed_time=None,
                    datetimes_contained_upper_bound_inclusive=_DATE_2,
                )
            ])

        # Act
        args = export_manager.get_ingest_view_export_task_args()

        # Assert
        self.assertListEqual(
            args,
            [
                GcsfsIngestViewExportArgs(
                    ingest_view_name="ingest_view",
                    upper_bound_datetime_prev=_DATE_1,
                    upper_bound_datetime_to_export=_DATE_2,
                )
            ],
        )
示例#3
0
    def test_getIngestViewExportTaskArgs_rawCodeTableOlderThanLastExport(
            self) -> None:
        # Arrange
        CODE_TABLE_TAG = "RECIDIVIZ_REFERENCE_ingest_view"
        region = self.create_fake_region(ingest_view_exports_enabled=True)
        export_manager = self.create_export_manager(
            region, controller_file_tags=[CODE_TABLE_TAG])
        export_manager.file_metadata_manager.get_ingest_view_metadata_for_most_recent_valid_job = Mock(  # type: ignore
            return_value=DirectIngestIngestFileMetadata(
                file_id=_ID,
                region_code=region.region_code,
                file_tag=CODE_TABLE_TAG,
                normalized_file_name="normalized_file_name",
                processed_time=_DATE_2,
                is_invalidated=False,
                is_file_split=False,
                job_creation_time=_DATE_2,
                export_time=_DATE_2,
                datetimes_contained_lower_bound_exclusive=_DATE_2 -
                datetime.timedelta(days=7),
                datetimes_contained_upper_bound_inclusive=_DATE_2,
                discovery_time=_DATE_2,
            ))
        export_manager.file_metadata_manager.get_metadata_for_raw_files_discovered_after_datetime = Mock(  # type: ignore
            return_value=[
                DirectIngestRawFileMetadata(
                    file_id=2,
                    region_code=region.region_code,
                    file_tag=CODE_TABLE_TAG,
                    discovery_time=_DATE_1,
                    normalized_file_name=
                    "unprocessed_2015-01-02T03:03:03:000003_raw_file_tag.csv",
                    processed_time=None,
                    datetimes_contained_upper_bound_inclusive=_DATE_1,
                )
            ])

        # Act
        args = export_manager.get_ingest_view_export_task_args()

        # Assert
        # New code tables are backdated but don't need to be re-ingested, so ignore them.
        self.assertListEqual(args, [])
示例#4
0
    def test_getIngestViewExportTaskArgs_rawFileOlderThanLastExport(
            self) -> None:
        # Arrange
        region = self.create_fake_region(ingest_view_exports_enabled=True)
        export_manager = self.create_export_manager(region)
        export_manager.file_metadata_manager.get_ingest_view_metadata_for_most_recent_valid_job = Mock(  # type: ignore
            return_value=DirectIngestIngestFileMetadata(
                file_id=_ID,
                region_code=region.region_code,
                file_tag="ingest_view",
                normalized_file_name="normalized_file_name",
                processed_time=_DATE_2,
                is_invalidated=False,
                is_file_split=False,
                job_creation_time=_DATE_2,
                export_time=_DATE_2,
                datetimes_contained_lower_bound_exclusive=_DATE_2,
                datetimes_contained_upper_bound_inclusive=_DATE_2,
                discovery_time=_DATE_2,
            ))
        export_manager.file_metadata_manager.get_metadata_for_raw_files_discovered_after_datetime = Mock(  # type: ignore
            return_value=[
                DirectIngestRawFileMetadata(
                    file_id=2,
                    region_code=region.region_code,
                    file_tag="ingest_view",
                    discovery_time=_DATE_1,
                    normalized_file_name=
                    "unprocessed_2015-01-02T03:03:03:000003_raw_file_tag.csv",
                    processed_time=None,
                    datetimes_contained_upper_bound_inclusive=_DATE_1,
                )
            ])

        # Act
        with pytest.raises(
                ValueError,
                match=r"upper bound date.*before the last valid export"):
            export_manager.get_ingest_view_export_task_args()
    def run_split_ingest_file_progression_pre_processing(
            self,
            metadata_manager: PostgresDirectIngestFileMetadataManager,
            original_file_metadata: DirectIngestIngestFileMetadata,
            split_file_path: GcsfsFilePath,
            discovery_before_export_recorded: bool = False):
        """Runs through the full progression of operations we expect to run on a split ingest file, up until processing.
        """
        expected_metadata = DirectIngestIngestFileMetadata.new_with_defaults(
            region_code=metadata_manager.region_code,
            file_tag=original_file_metadata.file_tag,
            is_invalidated=False,
            is_file_split=True,
            job_creation_time=datetime.datetime(2015, 1, 2, 3, 5, 5),
            datetimes_contained_lower_bound_exclusive=original_file_metadata.
            datetimes_contained_lower_bound_exclusive,
            datetimes_contained_upper_bound_inclusive=original_file_metadata.
            datetimes_contained_upper_bound_inclusive,
            normalized_file_name=split_file_path.file_name,
            export_time=None,
            discovery_time=None,
            processed_time=None,
        )

        with freeze_time('2015-01-02T03:05:05'):
            split_file_metadata = self.metadata_manager.register_ingest_file_split(
                original_file_metadata, split_file_path)

        self.assertEqual(expected_metadata, split_file_metadata)
        metadata = metadata_manager.get_file_metadata(split_file_path)
        self.assertEqual(expected_metadata, metadata)

        # ... export actually performed in here
        if discovery_before_export_recorded:
            with freeze_time('2015-01-02T03:06:07'):
                metadata_manager.mark_file_as_discovered(split_file_path)

            expected_metadata.discovery_time = datetime.datetime(
                2015, 1, 2, 3, 6, 7)
            expected_metadata.export_time = datetime.datetime(
                2015, 1, 2, 3, 6, 7)

            metadata = metadata_manager.get_file_metadata(split_file_path)
            self.assertEqual(expected_metadata, metadata)

        with freeze_time('2015-01-02T03:06:08'):
            self.metadata_manager.mark_ingest_view_exported(
                split_file_metadata)

        expected_metadata.export_time = datetime.datetime(2015, 1, 2, 3, 6, 8)

        metadata = metadata_manager.get_file_metadata(split_file_path)
        self.assertEqual(expected_metadata, metadata)

        if not discovery_before_export_recorded:
            with freeze_time('2015-01-02T03:07:07'):
                metadata_manager.mark_file_as_discovered(split_file_path)

            expected_metadata.discovery_time = datetime.datetime(
                2015, 1, 2, 3, 7, 7)
            metadata = metadata_manager.get_file_metadata(split_file_path)
            self.assertEqual(expected_metadata, metadata)
    def run_ingest_view_file_progression(
            self,
            export_args: GcsfsIngestViewExportArgs,
            metadata_manager: PostgresDirectIngestFileMetadataManager,
            ingest_view_unprocessed_path: GcsfsFilePath,
            discovery_before_export_recorded: bool = False,
            split_file: bool = False):
        """Runs through the full progression of operations we expect to run on an individual ingest view file."""
        with freeze_time('2015-01-02T03:05:05'):
            metadata_manager.register_ingest_file_export_job(export_args)

        ingest_file_metadata = metadata_manager.get_ingest_view_metadata_for_export_job(
            export_args)

        expected_metadata = DirectIngestIngestFileMetadata.new_with_defaults(
            region_code=metadata_manager.region_code,
            file_tag=export_args.ingest_view_name,
            is_invalidated=False,
            is_file_split=False,
            job_creation_time=datetime.datetime(2015, 1, 2, 3, 5, 5),
            datetimes_contained_lower_bound_exclusive=export_args.
            upper_bound_datetime_prev,
            datetimes_contained_upper_bound_inclusive=export_args.
            upper_bound_datetime_to_export,
            normalized_file_name=None,
            export_time=None,
            discovery_time=None,
            processed_time=None,
        )

        self.assertEqual(expected_metadata, ingest_file_metadata)

        with freeze_time('2015-01-02T03:06:06'):
            self.metadata_manager.register_ingest_view_export_file_name(
                ingest_file_metadata, ingest_view_unprocessed_path)
        expected_metadata.normalized_file_name = ingest_view_unprocessed_path.file_name

        metadata = metadata_manager.get_file_metadata(
            ingest_view_unprocessed_path)
        self.assertEqual(expected_metadata, metadata)

        # ... export actually performed in here
        if discovery_before_export_recorded:
            with freeze_time('2015-01-02T03:06:07'):
                metadata_manager.mark_file_as_discovered(
                    ingest_view_unprocessed_path)

            expected_metadata.discovery_time = datetime.datetime(
                2015, 1, 2, 3, 6, 7)
            expected_metadata.export_time = datetime.datetime(
                2015, 1, 2, 3, 6, 7)

            metadata = metadata_manager.get_file_metadata(
                ingest_view_unprocessed_path)
            self.assertEqual(expected_metadata, metadata)

        with freeze_time('2015-01-02T03:06:08'):
            self.metadata_manager.mark_ingest_view_exported(
                ingest_file_metadata)

        expected_metadata.export_time = datetime.datetime(2015, 1, 2, 3, 6, 8)

        metadata = metadata_manager.get_file_metadata(
            ingest_view_unprocessed_path)
        self.assertEqual(expected_metadata, metadata)

        if not discovery_before_export_recorded:
            with freeze_time('2015-01-02T03:07:07'):
                metadata_manager.mark_file_as_discovered(
                    ingest_view_unprocessed_path)

            expected_metadata.discovery_time = datetime.datetime(
                2015, 1, 2, 3, 7, 7)
            metadata = metadata_manager.get_file_metadata(
                ingest_view_unprocessed_path)
            self.assertEqual(expected_metadata, metadata)

        split_file_paths_and_metadata: List[Tuple[
            GcsfsFilePath, DirectIngestIngestFileMetadata]] = []
        if split_file:
            metadata = metadata_manager.get_file_metadata(
                ingest_view_unprocessed_path)
            if not isinstance(metadata, DirectIngestIngestFileMetadata):
                self.fail(f'Unexpected metadata type {type(metadata)}')

            for i in range(2):
                split_file_path = self._make_unprocessed_path(
                    f'bucket/split{i}.csv',
                    GcsfsDirectIngestFileType.INGEST_VIEW)
                self.run_split_ingest_file_progression_pre_processing(
                    metadata_manager, metadata, split_file_path,
                    discovery_before_export_recorded)

                split_file_metadata = metadata_manager.get_file_metadata(
                    split_file_path)

                if not isinstance(split_file_metadata,
                                  DirectIngestIngestFileMetadata):
                    self.fail(
                        f'Unexpected split_file_metadata type [{split_file_metadata}].'
                    )

                split_file_paths_and_metadata.append(
                    (split_file_path, split_file_metadata))

        with freeze_time('2015-01-02T03:08:08'):
            metadata_manager.mark_file_as_processed(
                ingest_view_unprocessed_path)

        expected_metadata.processed_time = datetime.datetime(
            2015, 1, 2, 3, 8, 8)

        metadata = metadata_manager.get_file_metadata(
            ingest_view_unprocessed_path)

        self.assertEqual(expected_metadata, metadata)

        for split_file_path, split_file_metadata in split_file_paths_and_metadata:
            expected_metadata = split_file_metadata
            with freeze_time('2015-01-02T03:09:09'):
                metadata_manager.mark_file_as_processed(split_file_path)

            expected_metadata.processed_time = datetime.datetime(
                2015, 1, 2, 3, 9, 9)

            metadata = metadata_manager.get_file_metadata(split_file_path)

            self.assertEqual(expected_metadata, metadata)