Пример #1
0
def datafiles_to_db_by_source(**kwargs):
    """
    Populate tables with source files
    """

    if not any(kwargs.values()):
        ctx = click.get_current_context()
        click.echo(ctx.get_help())
    else:
        manager = PipelineManager(storage=get_source_folder(), dbi=app.dbi)
        for _arg, pipeline_info_list in arg_to_pipeline_config_list.items():
            arg = _arg.replace(".", "__")
            if kwargs['all'] or kwargs[arg]:
                for pipeline, sub_dir, options in pipeline_info_list:
                    required_flag = options.get('required_flag', None)
                    if not required_flag or kwargs.get(required_flag, False):
                        manager.pipeline_register(
                            pipeline=pipeline,
                            sub_directory=sub_dir,
                            force=kwargs['force'],
                            continue_transform=kwargs['continue'],
                            products=kwargs['products'],
                            **options,
                        )
        manager.pipeline_process_all()
Пример #2
0
 def test_pipeline_register(self):
     manager = Manager()
     manager.pipeline_register(
         'test_pipeline',
         pipeline_id='fake_pipeline',
         custom_parameter=True,
         force=True,
         unpack=True,
         trigger_dataflow_dag=True,
     )
     assert len(manager._pipelines) == 1
     self.assert_pipeline_config(manager, 'fake_pipeline', True,
                                 'test_pipeline', None, True, True)
Пример #3
0
 def test_transform_pipeline_process(self, app_with_db):
     manager = Manager(dbi=app_with_db.dbi)
     manager._pipelines['fake_pipeline'] = PipelineConfig(
         pipeline=FakePipeline(1234),
         sub_directory=None,
         force=False,
         unpack=False,
         trigger_dataflow_dag=False,
     )
     manager.pipeline_process('fake_pipeline')
     actual_data_file_registry_list = DatafileRegistryModel.query.all()
     assert len(actual_data_file_registry_list) == 1
     actual_data_file_registry = actual_data_file_registry_list[0]
     assert actual_data_file_registry.state == DatafileState.PROCESSED.value
     assert actual_data_file_registry.error_message is None
     assert actual_data_file_registry.file_name is None
     assert actual_data_file_registry.created_timestamp == datetime.utcnow()
     assert actual_data_file_registry.updated_timestamp == datetime.utcnow()
     assert actual_data_file_registry.source == '1234'
Пример #4
0
 def test_pipeline_register_when_already_registered(self):
     manager = Manager()
     manager.pipeline_register('test_pipeline', pipeline_id='fake_pipeline')
     with pytest.raises(ValueError):
         manager.pipeline_register('test_pipeline',
                                   pipeline_id='fake_pipeline')
     self.assert_pipeline_config(manager, 'fake_pipeline', False,
                                 'test_pipeline', None, True, False)
Пример #5
0
    def test_pipeline_process(
        self,
        mock_read_files,
        mock_get_file_names,
        app_with_db,
        raise_exception,
        expected_state,
        expected_error_message,
    ):
        def read_files(*args, **kwargs):
            yield ['fake_file.txt']

        mock_get_file_names.return_value = ['fake_file.txt']
        mock_read_files.side_effect = read_files

        bucket = app_with_db.config['s3']['bucket_url']
        source_folder = os.path.join(
            bucket, app_with_db.config['s3']['datasets_folder'])
        manager = Manager(storage=source_folder, dbi=app_with_db.dbi)
        manager._pipelines['fake_pipeline'] = PipelineConfig(
            pipeline=FakePipeline(1234,
                                  raise_processing_exception=raise_exception),
            sub_directory='/tmp/fake_pipeline',
            force=False,
            unpack=False,
            trigger_dataflow_dag=False,
        )
        manager.pipeline_process('fake_pipeline')
        actual_data_file_registry_list = DatafileRegistryModel.query.all()
        assert len(actual_data_file_registry_list) == 1
        actual_data_file_registry = actual_data_file_registry_list[0]
        assert actual_data_file_registry.state == expected_state
        assert actual_data_file_registry.error_message == expected_error_message
        assert actual_data_file_registry.file_name == 'fake_file.txt'
        assert actual_data_file_registry.created_timestamp == datetime.utcnow()
        assert actual_data_file_registry.updated_timestamp == datetime.utcnow()
        assert actual_data_file_registry.source == '1234'
Пример #6
0
 def test_pipeline_process_all(self, mock_pipeline_process):
     mock_pipeline_process.return_value = None
     manager = Manager()
     manager._pipelines['fake_pipeline'] = None
     manager._pipelines['fake_pipeline_2'] = None
     manager.pipeline_process_all()
     assert mock_pipeline_process.call_args_list[0][0][0] == 'fake_pipeline'
     assert mock_pipeline_process.call_args_list[1][0][
         0] == 'fake_pipeline_2'
Пример #7
0
 def test_pipeline_remove(self, pipeline):
     manager = Manager()
     fake_pipeline = FakePipeline(0)
     manager._pipelines[getattr(pipeline, 'id', pipeline)] = fake_pipeline
     manager.pipeline_remove(pipeline)
     assert pipeline not in manager._pipelines
Пример #8
0
 def test_pipeline_get(self, pipeline, expected_result):
     manager = Manager()
     fake_pipeline = FakePipeline(0)
     manager._pipelines[getattr(pipeline, 'id', pipeline)] = fake_pipeline
     assert manager.pipeline_get(pipeline) == fake_pipeline
Пример #9
0
 def test_to_pipeline_id(self, pipeline, expected_result):
     manager = Manager()
     assert manager._to_pipeline_id(pipeline) == expected_result