def test_raw_file_metadata_normalized_file_name_unique_constraint(self): session = SessionFactory.for_schema_base(OperationsBase) raw_metadata_1 = schema.DirectIngestRawFileMetadata( region_code='us_xx_yyyy', file_tag='file_tag', discovery_time=datetime.datetime(2019, 10, 11), normalized_file_name='foo.txt', datetimes_contained_upper_bound_inclusive=datetime.datetime( 2019, 10, 10), ) raw_metadata_2 = schema.DirectIngestRawFileMetadata( region_code='us_xx_yyyy', file_tag='file_tag', discovery_time=datetime.datetime(2019, 11, 12), normalized_file_name='foo.txt', datetimes_contained_upper_bound_inclusive=datetime.datetime( 2019, 11, 11), ) session.add(raw_metadata_1) session.add(raw_metadata_2) with self.assertRaises(IntegrityError): session.commit() session = SessionFactory.for_schema_base(OperationsBase) self.assertEqual([], session.query( schema.DirectIngestRawFileMetadata).all())
def test_raw_file_metadata_normalized_file_name_unique_constraint( self) -> None: with SessionFactory.using_database(self.database_key, autocommit=False) as session: raw_metadata_1 = schema.DirectIngestRawFileMetadata( region_code="us_xx_yyyy", file_tag="file_tag", discovery_time=datetime.datetime(2019, 10, 11), normalized_file_name="foo.txt", datetimes_contained_upper_bound_inclusive=datetime.datetime( 2019, 10, 10), ) raw_metadata_2 = schema.DirectIngestRawFileMetadata( region_code="us_xx_yyyy", file_tag="file_tag", discovery_time=datetime.datetime(2019, 11, 12), normalized_file_name="foo.txt", datetimes_contained_upper_bound_inclusive=datetime.datetime( 2019, 11, 11), ) session.add(raw_metadata_1) session.add(raw_metadata_2) with self.assertRaises(IntegrityError): session.commit() with SessionFactory.using_database(self.database_key, autocommit=False) as session: self.assertEqual([], session.query( schema.DirectIngestRawFileMetadata).all())
def mark_file_as_discovered(self, path: GcsfsFilePath) -> None: if not path.file_name.startswith(DIRECT_INGEST_UNPROCESSED_PREFIX): raise ValueError("Expect only unprocessed paths in this function.") parts = filename_parts_from_path(path) session = SessionFactory.for_schema_base(OperationsBase) try: if parts.file_type == GcsfsDirectIngestFileType.INGEST_VIEW: metadata = dao.get_file_metadata_row_for_path( session, self.region_code, path ) dt = datetime.datetime.utcnow() if not metadata.export_time: metadata.export_time = dt metadata.discovery_time = dt elif parts.file_type == GcsfsDirectIngestFileType.RAW_DATA: session.add( schema.DirectIngestRawFileMetadata( region_code=self.region_code, file_tag=parts.file_tag, normalized_file_name=path.file_name, discovery_time=datetime.datetime.utcnow(), processed_time=None, datetimes_contained_upper_bound_inclusive=parts.utc_upload_datetime, ) ) else: raise ValueError(f"Unexpected path type: {parts.file_type}") session.commit() except Exception as e: session.rollback() raise e finally: session.close()
def test_raw_file_metadata(self): session = SessionFactory.for_schema_base(OperationsBase) raw_metadata = schema.DirectIngestRawFileMetadata( region_code='us_xx_yyyy', file_tag='file_tag', discovery_time=datetime.datetime.now(), normalized_file_name='foo.txt', datetimes_contained_upper_bound_inclusive=datetime.datetime( 2019, 10, 11), ) session.add(raw_metadata) session.commit() result_metadata = one( session.query(schema.DirectIngestRawFileMetadata).all()) self.assertEqual(result_metadata, raw_metadata) self.assertIsNotNone(result_metadata.file_id)
def mark_raw_file_as_discovered(self, path: GcsfsFilePath) -> None: self._check_is_raw_file_path(path) if not path.file_name.startswith(DIRECT_INGEST_UNPROCESSED_PREFIX): raise ValueError("Expect only unprocessed paths in this function.") parts = filename_parts_from_path(path) with SessionFactory.using_database(self.database_key) as session: session.add( schema.DirectIngestRawFileMetadata( region_code=self.region_code, file_tag=parts.file_tag, normalized_file_name=path.file_name, discovery_time=datetime.datetime.now(tz=pytz.UTC), processed_time=None, datetimes_contained_upper_bound_inclusive=parts. utc_upload_datetime, ))
def test_raw_file_metadata(self) -> None: with SessionFactory.using_database(self.database_key, autocommit=False) as session: raw_metadata = schema.DirectIngestRawFileMetadata( region_code="us_xx_yyyy", file_tag="file_tag", discovery_time=datetime.datetime.now(), normalized_file_name="foo.txt", datetimes_contained_upper_bound_inclusive=datetime.datetime( 2019, 10, 11), ) session.add(raw_metadata) session.commit() result_metadata = one( session.query(schema.DirectIngestRawFileMetadata).all()) self.assertEqual(result_metadata, raw_metadata) self.assertIsNotNone(result_metadata.file_id)