def test_is_task_queued_has_tasks(self): # Arrange file_path = to_normalized_unprocessed_file_path( 'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW) gcsfs_args = \ GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=GcsfsFilePath.from_absolute_path(file_path)) full_task_name = \ _build_task_id(_REGION.region_code, gcsfs_args.task_id_tag()) info = ProcessIngestJobCloudTaskQueueInfo( queue_name='queue_name', task_names=[ 'projects/path/to/random_task', f'projects/path/to/{full_task_name}' ]) file_path = to_normalized_unprocessed_file_path( 'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW) gcsfs_args = \ GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=GcsfsFilePath.from_absolute_path(file_path)) # Act gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertTrue(gcsfs_args_queued)
def create_direct_ingest_raw_data_import_task(self, region: Region, data_import_args: GcsfsRawDataBQImportArgs): if not self.controller: raise ValueError( "Controller is null - did you call set_controller()?") task_id = _build_task_id(self.controller.region.region_code, None) self.bq_import_export_tasks.append( (f'projects/path/to/{task_id}-raw_data_import', data_import_args))
def test_info_single_task(self) -> None: # Arrange gcsfs_args = GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=self.ingest_view_file_path, ) full_task_name = _build_task_id(_REGION.region_code, DirectIngestInstance.PRIMARY, gcsfs_args.task_id_tag()) info = ProcessIngestJobCloudTaskQueueInfo( queue_name="queue_name", task_names=[ "projects/path/to/random_task", f"projects/path/to/{full_task_name}", ], ) gcsfs_args = GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=self.ingest_view_file_path, ) # Act gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertTrue(gcsfs_args_queued) self.assertTrue( info.tasks_for_instance(_REGION.region_code, DirectIngestInstance.PRIMARY)) self.assertFalse( info.tasks_for_instance(_REGION.region_code, DirectIngestInstance.SECONDARY))
def create_direct_ingest_sftp_download_task(self, region: Region) -> None: if not self.controller: raise ValueError( "Controller is null - did you call set_controller()?") task_id = _build_task_id(self.controller.region.region_code, DirectIngestInstance.PRIMARY, None) self.sftp_tasks.append( f"projects/path/to/{task_id}-handle_sftp_download")
def create_direct_ingest_handle_new_files_task(self, region: Region, can_start_ingest: bool): if not self.controller: raise ValueError( "Controller is null - did you call set_controller()?") task_id = _build_task_id(self.controller.region.region_code, None) self.scheduler_tasks.append( (f'projects/path/to/{task_id}-handle_new_files', can_start_ingest))
def create_direct_ingest_ingest_view_export_task( self, region: Region, ingest_view_export_args: GcsfsIngestViewExportArgs ) -> None: if not self.controller: raise ValueError("Controller is null - did you call set_controller()?") task_id = _build_task_id(self.controller.region.region_code, None) self.bq_import_export_tasks.append( (f"projects/path/to/{task_id}-ingest_view_export", ingest_view_export_args) )
def create_direct_ingest_scheduler_queue_task( self, region: Region, just_finished_job: bool, delay_sec: int ) -> None: """Queues *but does not run* a scheduler task.""" if not self.controller: raise ValueError("Controller is null - did you call set_controller()?") task_id = _build_task_id(self.controller.region.region_code, None) self.scheduler_tasks.append( (f"projects/path/to/{task_id}-schedule", just_finished_job) )
def create_direct_ingest_process_job_task( self, region: Region, ingest_args: IngestArgs ) -> None: """Queues *but does not run* a process job task.""" if not self.controller: raise ValueError("Controller is null - did you call set_controller()?") task_id = _build_task_id( self.controller.region.region_code, ingest_args.task_id_tag() ) self.process_job_tasks.append((f"projects/path/to/{task_id}", ingest_args))
def create_direct_ingest_handle_new_files_task( self, region: Region, ingest_instance: DirectIngestInstance, ingest_bucket: GcsfsBucketPath, can_start_ingest: bool, ) -> None: if not self.controller: raise ValueError( "Controller is null - did you call set_controller()?") task_id = _build_task_id(self.controller.region.region_code, ingest_instance, None) self.scheduler_tasks.append(( f"projects/path/to/{task_id}-handle_new_files", ingest_bucket, can_start_ingest, ))
def test_is_task_queued_has_tasks(self): # Arrange file_path = to_normalized_unprocessed_file_path('file_path.csv') gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(), file_path=file_path) full_task_name = \ _build_task_id(_REGION.region_code, gcsfs_args.task_id_tag()) info = CloudTaskQueueInfo(queue_name='queue_name', task_names=[ f'projects/path/to/random_task', f'projects/path/to/{full_task_name}' ]) file_path = to_normalized_unprocessed_file_path('file_path.csv') gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(), file_path=file_path) # Act gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertTrue(gcsfs_args_queued)
def test_info_tasks_both_instances(self) -> None: # Arrange gcsfs_args = GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=self.ingest_view_file_path, ) full_task_names = [ _build_task_id( _REGION.region_code, ingest_instance, gcsfs_args.task_id_tag(), ) for ingest_instance in DirectIngestInstance ] info = ProcessIngestJobCloudTaskQueueInfo( queue_name="queue_name", task_names=[ "projects/path/to/random_task", ] + [ f"projects/path/to/{full_task_name}" for full_task_name in full_task_names ], ) gcsfs_args = GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=self.ingest_view_file_path, ) # Act gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertTrue(gcsfs_args_queued) for ingest_instance in DirectIngestInstance: self.assertTrue( info.tasks_for_instance(_REGION.region_code, ingest_instance))