def test_do_upload_graceful_failures(self, mock_fs_factory: Mock) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "test-project-direct-ingest-state-us-xx", "raw_data/test_file.txt" ), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[ ( "test-project-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY, ), ( "test-project-direct-ingest-state-us-xx/raw_data/non_existent_file.txt", TODAY, ), ], project_id="test-project", region="us_xx", ) uploaded_files, unable_to_upload_files = controller.do_upload() self.assertEqual( uploaded_files, ["test-project-direct-ingest-state-us-xx/raw_data/test_file.txt"], ) self.assertEqual( unable_to_upload_files, ["test-project-direct-ingest-state-us-xx/raw_data/non_existent_file.txt"], )
def main( sandbox_dataset_prefix: str, schema_type: SchemaType, direct_ingest_instance: Optional[DirectIngestInstance], ) -> None: """Defines the main function responsible for moving data from Postgres to BQ.""" logging.info("Prefixing all output datasets with [%s_].", known_args.sandbox_dataset_prefix) fake_gcs = FakeGCSFileSystem() # We mock the export config to a version that does not have any paused regions. with mock.patch( f"{cloud_sql_to_bq_refresh_config.__name__}.GcsfsFactory.build", return_value=fake_gcs, ): fake_gcs.upload_from_string( path=CloudSqlToBQConfig.default_config_path(), contents=STANDARD_YAML_CONTENTS, content_type="text/yaml", ) federated_bq_schema_refresh( schema_type=schema_type, direct_ingest_instance=direct_ingest_instance, dataset_override_prefix=sandbox_dataset_prefix, ) config = CloudSqlToBQConfig.for_schema_type(schema_type) final_destination_dataset = config.unioned_multi_region_dataset( dataset_override_prefix=sandbox_dataset_prefix) logging.info("Load complete. Data loaded to dataset [%s].", final_destination_dataset)
def test_do_upload_succeeds(self, mock_fs_factory: Mock) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.txt"), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[( "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY, )], project_id="recidiviz-456", region="us_xx", ) expected_result = [ "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt" ] result: MultiRequestResultWithSkipped[str, str, str] = controller.do_upload() self.assertEqual(result.successes, expected_result) self.assertEqual(len(result.failures), 0) self.assertEqual(len(controller.skipped_files), 0) self.assertFalse(self.us_xx_manager.is_instance_paused())
def setUp(self) -> None: self.test_app = Flask(__name__) blueprint = Blueprint("data_discovery_test", __name__) self.test_client = self.test_app.test_client() self.fakeredis = fakeredis.FakeRedis() self.fs = FakeGCSFileSystem() self.gcs_factory_patcher = patch( "recidiviz.admin_panel.routes.data_discovery.GcsfsFactory.build") self.gcs_factory_patcher.start().return_value = self.fs self.project_number_patcher = patch( "recidiviz.utils.metadata.project_number", return_value=999) self.requires_gae_auth_patcher = patch( "recidiviz.admin_panel.routes.data_discovery.requires_gae_auth", side_effect=lambda route: route, ) self.redis_patcher = patch("redis.Redis", return_value=self.fakeredis) self.project_number_patcher.start() self.redis_patcher.start() self.requires_gae_auth_patcher.start() add_data_discovery_routes(blueprint) self.test_app.register_blueprint(blueprint)
def setUp(self) -> None: self.metadata_patcher = mock.patch( "recidiviz.utils.metadata.project_id") self.mock_project_id_fn = self.metadata_patcher.start() self.mock_project_id_fn.return_value = "recidiviz-staging" test_secrets = { # pylint: disable=protected-access SQLAlchemyEngineManager._get_cloudsql_instance_id_key(schema_type): f"test-project:us-east2:{schema_type.value}-data" for schema_type in SchemaType } self.get_secret_patcher = mock.patch( "recidiviz.utils.secrets.get_secret") self.get_secret_patcher.start().side_effect = test_secrets.get self.gcs_factory_patcher = mock.patch( "recidiviz.admin_panel.dataset_metadata_store.GcsfsFactory.build") self.fake_fs = FakeGCSFileSystem() self.gcs_factory_patcher.start().return_value = self.fake_fs self.fake_config_path = GcsfsFilePath.from_absolute_path( "gs://recidiviz-staging-configs/cloud_sql_to_bq_config.yaml")
def setUp(self) -> None: self.schema_types: List[SchemaType] = list(SchemaType) self.enabled_schema_types = [ schema_type for schema_type in self.schema_types if CloudSqlToBQConfig.is_valid_schema_type(schema_type) ] self.mock_project_id = "fake-recidiviz-project" self.metadata_patcher = mock.patch( "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.metadata" ) self.mock_metadata = self.metadata_patcher.start() self.mock_metadata.project_id.return_value = self.mock_project_id self.gcs_factory_patcher = mock.patch( "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.GcsfsFactory.build" ) self.fake_gcs = FakeGCSFileSystem() self.gcs_factory_patcher.start().return_value = self.fake_gcs self.set_config_yaml( """ region_codes_to_exclude: - US_ND state_history_tables_to_include: - state_person_history county_columns_to_exclude: person: - full_name - birthdate_inferred_from_age """ )
def test_normalize_file_path(self, mock_fs_factory: mock.MagicMock, mock_environment: mock.MagicMock) -> None: mock_environment.return_value = "production" mock_fs = FakeGCSFileSystem() mock_fs_factory.return_value = mock_fs path = GcsfsFilePath.from_absolute_path("bucket-us-xx/file-tag.csv") mock_fs.test_add_path(path, local_path=None) request_args = { "bucket": path.bucket_name, "relative_file_path": path.blob_name, } headers = {"X-Appengine-Cron": "test-cron"} response = self.client.get("/normalize_raw_file_path", query_string=request_args, headers=headers) self.assertEqual(200, response.status_code) self.assertEqual(1, len(mock_fs.all_paths)) registered_path = mock_fs.all_paths[0] if not isinstance(registered_path, GcsfsFilePath): self.fail(f"Unexpected type for path [{type(registered_path)}]") self.assertTrue( DirectIngestGCSFileSystem.is_normalized_file_path(registered_path))
def setUp(self) -> None: self.client = app.test_client() self.fs = FakeGCSFileSystem() self.gcs_factory_patcher = patch( "recidiviz.ingest.aggregate.scrape_aggregate_reports.GcsfsFactory.build" ) self.gcs_factory_patcher.start().return_value = self.fs
def add_direct_ingest_path(fs: FakeGCSFileSystem, path: Union[GcsfsFilePath, GcsfsDirectoryPath], has_fixture: bool = True, fail_handle_file_call: bool = False): local_path = None if has_fixture and isinstance(path, GcsfsFilePath): local_path = _get_fixture_for_direct_ingest_path(path) fs.test_add_path(path, local_path, fail_handle_file_call)
def prepare_files(fs: FakeGCSFileSystem, manifest_filepath: str) -> GcsfsFilePath: """Makes the file system aware of all files for the report and returns the manifest filepath.""" directory = os.path.dirname(manifest_filepath) for file_name in os.listdir(directory): path = os.path.join(directory, file_name) fs.test_add_path(gcs_path(path), path) return gcs_path(manifest_filepath)
def setUp(self) -> None: self.project_id_patcher = patch("recidiviz.utils.metadata.project_id") self.email_generation_patcher = patch( "recidiviz.reporting.email_generation.generate") self.gcs_file_system_patcher = patch( "recidiviz.cloud_storage.gcsfs_factory.GcsfsFactory.build") self.project_id_patcher.start().return_value = "recidiviz-test" self.mock_email_generation = self.email_generation_patcher.start() self.gcs_file_system = FakeGCSFileSystem() self.mock_gcs_file_system = self.gcs_file_system_patcher.start() self.mock_gcs_file_system.return_value = self.gcs_file_system self.state_code = "US_ID" self.report_type = "po_monthly_report"
def setUp(self) -> None: # Ensures StateCode.US_XX is properly loaded importlib.reload(states) self.mock_project_id = "recidiviz-staging" self.metadata_patcher = mock.patch("recidiviz.utils.metadata.project_id") self.mock_metadata = self.metadata_patcher.start() self.mock_metadata.return_value = self.mock_project_id self.gcs_factory_patcher = mock.patch( "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.GcsfsFactory.build" ) self.fake_gcs = FakeGCSFileSystem() self.gcs_factory_patcher.start().return_value = self.fake_gcs yaml_contents = """ region_codes_to_exclude: - US_ND state_history_tables_to_include: - state_person_history county_columns_to_exclude: person: - full_name - birthdate_inferred_from_age """ path = GcsfsFilePath.from_absolute_path( f"gs://{self.mock_project_id}-configs/cloud_sql_to_bq_config.yaml" ) self.fake_gcs.upload_from_string( path=path, contents=yaml_contents, content_type="text/yaml" ) self.mock_bq_client = create_autospec(BigQueryClientImpl) self.client_patcher = mock.patch( f"{FEDERATED_REFRESH_PACKAGE_NAME}.BigQueryClientImpl" ) self.client_patcher.start().return_value = self.mock_bq_client self.view_update_client_patcher = mock.patch( "recidiviz.big_query.view_update_manager.BigQueryClientImpl" ) self.view_update_client_patcher.start().return_value = self.mock_bq_client test_secrets = { # pylint: disable=protected-access SQLAlchemyEngineManager._get_cloudsql_instance_id_key( schema_type ): f"test-project:us-east2:{schema_type.value}-data" for schema_type in SchemaType } self.get_secret_patcher = mock.patch("recidiviz.utils.secrets.get_secret") self.get_secret_patcher.start().side_effect = test_secrets.get
def test_upload_from_sftp( self, _mock_upload_controller: mock.MagicMock, _mock_download_controller: mock.MagicMock, mock_fs_factory: mock.MagicMock, mock_download_delegate_factory: mock.MagicMock, mock_sftp_auth: mock.MagicMock, mock_environment: mock.MagicMock, ) -> None: region_code = "us_xx" mock_environment.return_value = "staging" request_args = {"region": region_code, "date": "2021-01-01"} headers = {"X-Appengine-Cron": "test-cron"} mock_fs_factory.return_value = FakeGCSFileSystem() mock_download_delegate_factory.return_value = Mock( spec=BaseSftpDownloadDelegate, root_directory=lambda _, candidate_paths: ".", filter_paths=lambda _, candidate_paths: candidate_paths, post_process_downloads=lambda _, download_directory_path: None, ) mock_sftp_auth.return_value = SftpAuth("host", "username", "password", CnOpts()) response = self.client.post("/upload_from_sftp", query_string=request_args, headers=headers) self.assertEqual(200, response.status_code)
def create_export_manager( self, region: Region, is_detect_row_deletion_view: bool = False, materialize_raw_data_table_views: bool = False, controller_file_tags: Optional[List[str]] = None, ) -> DirectIngestIngestViewExportManager: metadata_manager = PostgresDirectIngestFileMetadataManager( region.region_code) controller_file_tags = (["ingest_view"] if controller_file_tags is None else controller_file_tags) return DirectIngestIngestViewExportManager( region=region, fs=FakeGCSFileSystem(), ingest_directory_path=GcsfsDirectoryPath.from_absolute_path( "ingest_bucket"), big_query_client=self.mock_client, file_metadata_manager=metadata_manager, view_collector=_ViewCollector( # type: ignore[arg-type] region, controller_file_tags=controller_file_tags, is_detect_row_deletion_view=is_detect_row_deletion_view, materialize_raw_data_table_views= materialize_raw_data_table_views, ), launched_file_tags=controller_file_tags, )
def setUp(self) -> None: self.fake_fs = FakeGCSFileSystem() self.project_id_patcher = patch("recidiviz.utils.metadata.project_id") self.project_id_patcher.start().return_value = "recidiviz-456" self.blocking_locks = ["blocking_lock1", "blocking_lock2"] with patch( "recidiviz.cloud_storage.gcs_pseudo_lock_manager.GcsfsFactory.build", Mock(return_value=self.fake_fs), ): self.lock_manager = DirectIngestRegionLockManager( region_code=StateCode.US_XX.value, blocking_locks=self.blocking_locks, ingest_instance=DirectIngestInstance.PRIMARY, ) self.lock_manager_secondary = DirectIngestRegionLockManager( region_code=StateCode.US_XX.value, blocking_locks=self.blocking_locks, ingest_instance=DirectIngestInstance.SECONDARY, ) self.lock_manager_other_region = DirectIngestRegionLockManager( region_code=StateCode.US_WW.value, blocking_locks=[], ingest_instance=DirectIngestInstance.PRIMARY, )
def setUp(self) -> None: self.fs = DirectIngestGCSFileSystem(FakeGCSFileSystem()) self.prioritizer = GcsfsDirectIngestJobPrioritizer( self.fs, self._INGEST_BUCKET_PATH, ["tagA", "tagB"], file_type_filter=GcsfsDirectIngestFileType.INGEST_VIEW, )
def setUp(self) -> None: self.project_id_patcher = mock.patch( "recidiviz.cloud_storage.gcs_pseudo_lock_manager.metadata") self.project_id_patcher.start().return_value = "recidiviz-123" self.gcs_factory_patcher = mock.patch( "recidiviz.cloud_storage.gcs_pseudo_lock_manager.GcsfsFactory.build" ) fake_gcs = FakeGCSFileSystem() self.gcs_factory_patcher.start().return_value = fake_gcs self.fs = fake_gcs
def setUp(self) -> None: self.project_id_patcher = patch( "recidiviz.admin_panel.admin_stores.metadata.project_id") self.project_id_patcher.start().return_value = "recidiviz-staging" self.gcs_factory_patcher = patch( "recidiviz.admin_panel.admin_stores.GcsfsFactory.build") fake_gcs = FakeGCSFileSystem() self.gcs_factory_patcher.start().return_value = fake_gcs self.fs = fake_gcs self.admin_stores = AdminStores()
def setUp(self) -> None: self.gcs_factory_patcher = mock.patch( "recidiviz.admin_panel.dataset_metadata_store.GcsfsFactory.build") fake_gcs = FakeGCSFileSystem() fake_gcs.upload_from_string( path=GcsfsFilePath.from_absolute_path( "gs://recidiviz-456-configs/cloud_sql_to_bq_config.yaml"), contents=""" region_codes_to_exclude: - US_ND state_history_tables_to_include: - state_person_history county_columns_to_exclude: person: - full_name - birthdate_inferred_from_age """, content_type="text/yaml", ) fake_gcs.upload_from_string( path=GcsfsFilePath.from_absolute_path( "gs://recidiviz-456-ingest-metadata/ingest_metadata_latest_ingested_upper_bounds.json" ), contents=""" {"state_code":"US_PA","processed_date":"2020-11-25"} {"state_code":"US_ID","processed_date":"2021-01-04"} {"state_code":"US_MO","processed_date":"2020-12-21"} {"state_code":"US_ND","processed_date":"2020-12-16"} """, content_type="text/text", ) fixture_folder = os.path.join( os.path.dirname(os.path.realpath(__file__)), "fixtures", ) self.table_column_map: Dict[str, List[str]] = defaultdict(list) for f in os.listdir(fixture_folder): _, table, col = f.split("__") self.table_column_map[table].append(col[:-len(".json")]) path = GcsfsFilePath.from_absolute_path( f"gs://recidiviz-456-ingest-metadata/{f}") fake_gcs.test_add_path(path, local_path=os.path.join(fixture_folder, f)) self.gcs_factory_patcher.start().return_value = fake_gcs self.store = DatasetMetadataCountsStore( dataset_nickname="ingest", metadata_file_prefix="ingest_state_metadata", override_project_id="recidiviz-456", ) self.store.recalculate_store()
def setUp(self) -> None: self.project_id_patcher = patch("recidiviz.utils.metadata.project_id") self.get_secret_patcher = patch("recidiviz.utils.secrets.get_secret") self.gcs_file_system_patcher = patch( "recidiviz.reporting.email_generation.GcsfsFactory.build" ) test_secrets = {"po_report_cdn_static_IP": "123.456.7.8"} self.get_secret_patcher.start().side_effect = test_secrets.get self.project_id_patcher.start().return_value = "recidiviz-test" self.gcs_file_system = FakeGCSFileSystem() self.mock_gcs_file_system = self.gcs_file_system_patcher.start() self.mock_gcs_file_system.return_value = self.gcs_file_system with open(self.fixture_file_path()) as fixture_file: self.recipient = Recipient.from_report_json(json.loads(fixture_file.read())) self.state_code = StateCode.US_ID self.mock_batch_id = "1" self.recipient.data["batch_id"] = self.mock_batch_id self.report_context = self.report_context_type(self.state_code, self.recipient)
def run_justice_counts_ingest_locally(manifest_file: str, clean_up_db: bool) -> None: tmp_db_dir = local_postgres_helpers.start_on_disk_postgresql_database() local_postgres_helpers.use_on_disk_postgresql_database( SQLAlchemyDatabaseKey.for_schema(SchemaType.JUSTICE_COUNTS)) fs = FakeGCSFileSystem() try: manual_upload.ingest(fs, test_utils.prepare_files(fs, manifest_file)) finally: cleanup_run(tmp_db_dir, clean_up_db)
def test_load_files_from_storage(self, mock_gcs_factory: MagicMock) -> None: """Test that load_files_from_storage returns files for the current batch and bucket name""" bucket_name = "bucket-name" self.mock_utils.get_email_content_bucket_name.return_value = bucket_name email_path = GcsfsFilePath.from_absolute_path( f"gs://{bucket_name}/{self.state_code}/{self.batch_id}/{self.to_address}.html" ) other_path = GcsfsFilePath.from_absolute_path( f"gs://{bucket_name}/excluded/exclude.json" ) fake_gcs_file_system = FakeGCSFileSystem() fake_gcs_file_system.upload_from_string( path=email_path, contents="<html>", content_type="text/html" ) fake_gcs_file_system.upload_from_string( path=other_path, contents="{}", content_type="text/json" ) mock_gcs_factory.return_value = fake_gcs_file_system files = email_delivery.load_files_from_storage( bucket_name, f"{self.state_code}/{self.batch_id}" ) self.assertEqual(files, {f"{self.to_address}": "<html>"})
def build_gcsfs_controller_for_tests( controller_cls, fixture_path_prefix: str, run_async: bool, fake_fs: Optional[FakeGCSFileSystem] = None, can_start_ingest: bool = True, **kwargs, ) -> GcsfsDirectIngestController: """Builds an instance of |controller_cls| for use in tests with several internal classes mocked properly. """ fake_fs = FakeGCSFileSystem() def mock_build_fs(): return fake_fs if 'TestGcsfsDirectIngestController' in controller_cls.__name__: view_collector_cls: Type[BigQueryViewCollector] = \ FakeDirectIngestPreProcessedIngestViewCollector else: view_collector_cls = DirectIngestPreProcessedIngestViewCollector with patch( f'{BaseDirectIngestController.__module__}.DirectIngestCloudTaskManagerImpl' ) as mock_task_factory_cls: with patch( f'{GcsfsDirectIngestController.__module__}.BigQueryClientImpl' ) as mock_big_query_client_cls: with patch( f'{GcsfsDirectIngestController.__module__}.DirectIngestRawFileImportManager', FakeDirectIngestRawFileImportManager): with patch( f'{GcsfsDirectIngestController.__module__}.DirectIngestPreProcessedIngestViewCollector', view_collector_cls): task_manager = FakeAsyncDirectIngestCloudTaskManager() \ if run_async else FakeSynchronousDirectIngestCloudTaskManager() mock_task_factory_cls.return_value = task_manager mock_big_query_client_cls.return_value = \ FakeDirectIngestBigQueryClient(project_id=metadata.project_id(), fs=fake_fs) with patch.object(GcsfsFactory, 'build', new=mock_build_fs): controller = controller_cls( ingest_directory_path= f'{fixture_path_prefix}/fixtures', storage_directory_path='storage/path', **kwargs) controller.csv_reader = TestSafeGcsCsvReader(fake_fs) controller.raw_file_import_manager.csv_reader = controller.csv_reader task_manager.set_controller(controller) fake_fs.test_set_delegate( DirectIngestFakeGCSFileSystemDelegate( controller, can_start_ingest=can_start_ingest)) return controller
def setUp(self) -> None: self.project_id = "recidiviz-456" self.project_id_patcher = patch("recidiviz.utils.metadata.project_id") self.project_id_patcher.start().return_value = self.project_id self.test_region = fake_region( region_code="us_xx", are_raw_data_bq_imports_enabled_in_env=True) self.region_module_patcher = patch.object( direct_ingest_raw_table_migration_collector, "regions", new=controller_fixtures, ) self.region_module_patcher.start() self.fs = DirectIngestGCSFileSystem(FakeGCSFileSystem()) self.ingest_directory_path = GcsfsDirectoryPath( bucket_name="direct/controllers/fixtures") self.temp_output_path = GcsfsDirectoryPath(bucket_name="temp_bucket") self.region_raw_file_config = DirectIngestRegionRawFileConfig( region_code="us_xx", yaml_config_file_dir=fixtures.as_filepath("us_xx"), ) self.mock_big_query_client = create_autospec(BigQueryClient) self.num_lines_uploaded = 0 self.mock_big_query_client.insert_into_table_from_cloud_storage_async.side_effect = ( self.mock_import_raw_file_to_big_query) self.import_manager = DirectIngestRawFileImportManager( region=self.test_region, fs=self.fs, ingest_directory_path=self.ingest_directory_path, temp_output_directory_path=self.temp_output_path, region_raw_file_config=self.region_raw_file_config, big_query_client=self.mock_big_query_client, ) self.import_manager.csv_reader = _TestSafeGcsCsvReader( self.fs.gcs_file_system) self.time_patcher = patch( "recidiviz.ingest.direct.controllers.direct_ingest_raw_file_import_manager.time" ) self.mock_time = self.time_patcher.start() def fake_get_dataset_ref(dataset_id: str) -> bigquery.DatasetReference: return bigquery.DatasetReference(project=self.project_id, dataset_id=dataset_id) self.mock_big_query_client.dataset_ref_for_id = fake_get_dataset_ref
def setUp(self) -> None: self.gcs_file_system_patcher = patch( "recidiviz.cloud_storage.gcsfs_factory.GcsfsFactory.build") self.requires_gae_auth_patcher = patch( "recidiviz.admin_panel.routes.case_triage.requires_gae_auth", side_effect=lambda route: route, ) self.requires_gae_auth_patcher.start() self.gcs_file_system = FakeGCSFileSystem() self.mock_gcs_file_system = self.gcs_file_system_patcher.start() self.mock_gcs_file_system.return_value = self.gcs_file_system self.app = Flask(__name__) blueprint = Blueprint("email_reporting_test", __name__) self.app.config["TESTING"] = True self.client = self.app.test_client() add_case_triage_routes(blueprint, admin_stores) self.app.register_blueprint(blueprint) with self.app.test_request_context(): self.state_code = StateCode.US_ID self.generate_emails_url = flask.url_for( "email_reporting_test._generate_emails", state_code_str=self.state_code.value, ) self.send_emails_url = flask.url_for( "email_reporting_test._send_emails", state_code_str=self.state_code.value, ) self.review_year = 2021 self.review_month = 5
def setUp(self) -> None: self.project_id_patcher = patch("recidiviz.utils.metadata.project_id") self.get_secret_patcher = patch("recidiviz.utils.secrets.get_secret") self.gcs_file_system_patcher = patch( "recidiviz.reporting.email_generation.GcsfsFactory.build" ) test_secrets = {"po_report_cdn_static_IP": "123.456.7.8"} self.get_secret_patcher.start().side_effect = test_secrets.get self.project_id_patcher.start().return_value = "recidiviz-test" self.gcs_file_system = FakeGCSFileSystem() self.mock_gcs_file_system = self.gcs_file_system_patcher.start() self.mock_gcs_file_system.return_value = self.gcs_file_system with open( os.path.join( f"{os.path.dirname(__file__)}/context/po_monthly_report", FIXTURE_FILE ) ) as fixture_file: self.recipient = Recipient.from_report_json(json.loads(fixture_file.read())) self.state_code = "US_ID" self.mock_batch_id = "1" self.recipient.data["batch_id"] = self.mock_batch_id self.report_context = PoMonthlyReportContext(self.state_code, self.recipient)
def setUp(self) -> None: self.schema_types: List[SchemaType] = list(SchemaType) self.disabled_schema_types = { SchemaType.JUSTICE_COUNTS, SchemaType.CASE_TRIAGE } self.enabled_schema_types = [ schema_type for schema_type in self.schema_types if schema_type not in self.disabled_schema_types ] self.mock_project_id = "fake-recidiviz-project" self.environment_patcher = mock.patch( "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.environment" ) self.metadata_patcher = mock.patch( "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.metadata" ) self.mock_metadata = self.metadata_patcher.start() self.mock_metadata.project_id.return_value = self.mock_project_id self.mock_environment = self.environment_patcher.start() self.mock_environment.GCP_PROJECT_STAGING = self.mock_project_id self.gcs_factory_patcher = mock.patch( "recidiviz.persistence.database.bq_refresh.cloud_sql_to_bq_refresh_config.GcsfsFactory.build" ) self.fake_gcs = FakeGCSFileSystem() self.gcs_factory_patcher.start().return_value = self.fake_gcs self.set_config_yaml(""" region_codes_to_exclude: - US_ND state_history_tables_to_include: - state_person_history county_columns_to_exclude: person: - full_name - birthdate_inferred_from_age """)
def main( repo_directory: str, system: schema.System, base_drive_folder_id: str, credentials_directory: str, app_url: Optional[str], filter_type: Optional[FilterType], regions: Optional[List[str]], ) -> None: """ Downloads, tests, and ingests specified regions """ regions_to_ingest = _get_list_of_regions(filter_type, regions) logging.info("Starting ingest of regions...") logging.info(regions_to_ingest) tmp_db_dir = local_postgres_helpers.start_on_disk_postgresql_database() local_postgres_helpers.use_on_disk_postgresql_database( SQLAlchemyDatabaseKey.for_schema(SchemaType.JUSTICE_COUNTS) ) fs = FakeGCSFileSystem() region_ingest_summary = [] try: for region in regions_to_ingest: region_ingest_summary.append( _full_ingest_region( fs, region, repo_directory, system, base_drive_folder_id, credentials_directory, app_url, ) ) finally: cleanup_run(tmp_db_dir, True) for ingest_result in region_ingest_summary: if ingest_result.success: logging.info("%s: success", ingest_result.region_code) else: logging.error( "%s: failed - %s", ingest_result.region_code, ingest_result.error )
def test_get_paths_to_upload_is_correct( self, mock_fs_factory: Mock, ) -> None: mock_fs = FakeGCSFileSystem() mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/test_file.txt"), local_path=None, ) mock_fs.test_add_path( path=GcsfsFilePath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/subdir1/test_file.txt", ), local_path=None, ) mock_fs.test_add_path( path=GcsfsDirectoryPath.from_bucket_and_blob_name( "recidiviz-456-direct-ingest-state-us-xx", "raw_data/subdir2/"), local_path=None, ) mock_fs_factory.return_value = mock_fs controller = UploadStateFilesToIngestBucketController( paths_with_timestamps=[ ("recidiviz-456-direct-ingest-state-us-xx/raw_data/", TODAY), ], project_id="recidiviz-456", region="us_xx", ) result = [ ("recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt", TODAY), ( "recidiviz-456-direct-ingest-state-us-xx/raw_data/subdir1/test_file.txt", TODAY, ), ] self.assertListEqual(result, controller.get_paths_to_upload()) self.assertFalse(self.us_xx_manager.is_instance_paused())
def setUp(self) -> None: self.fake_fs = FakeGCSFileSystem() self.project_id_patcher = patch("recidiviz.utils.metadata.project_id") self.project_id_patcher.start().return_value = "recidiviz-456" with patch( "recidiviz.cloud_storage.gcs_pseudo_lock_manager.GcsfsFactory.build", Mock(return_value=self.fake_fs), ): self.lock_manager = CloudSqlToBQLockManager() self.lock_bucket = self.lock_manager.lock_manager.bucket_name self.state_ingest_lock_manager = DirectIngestRegionLockManager( region_code=StateCode.US_XX.value, blocking_locks=[], ingest_instance=DirectIngestInstance.PRIMARY, ) self.county_ingest_lock_manager = DirectIngestRegionLockManager( region_code="US_XX_YYYYY", blocking_locks=[], ingest_instance=DirectIngestInstance.PRIMARY, )