Exemplo n.º 1
0
def run_csv_job(file_loc, job_name, model):
    tmp_folder = f'/var/tmp/amundsen/{job_name}'
    record_files_folder = f'{tmp_folder}/records'

    csv_extractor = CsvExtractor()
    csv_loader = FSMySQLCSVLoader()

    task = DefaultTask(extractor=csv_extractor,
                       loader=csv_loader,
                       transformer=NoopTransformer())

    job_config = ConfigFactory.from_dict({
        'extractor.csv.file_location':
        file_loc,
        'extractor.csv.model_class':
        model,
        'loader.mysql_filesystem_csv.record_dir_path':
        record_files_folder,
        'loader.mysql_filesystem_csv.delete_created_directories':
        True,
        'publisher.mysql.record_files_directory':
        record_files_folder,
        'publisher.mysql.conn_string':
        mysql_conn_string,
        'publisher.mysql.job_publish_tag':
        'unique_tag',
    })

    DefaultJob(conf=job_config, task=task,
               publisher=MySQLCSVPublisher()).launch()
Exemplo n.º 2
0
def run_table_column_job(table_path, column_path):
    tmp_folder = '/var/tmp/amundsen/table_column'
    record_files_folder = f'{tmp_folder}/records'

    extractor = CsvTableColumnExtractor()
    csv_loader = FSMySQLCSVLoader()
    task = DefaultTask(extractor,
                       loader=csv_loader,
                       transformer=NoopTransformer())
    job_config = ConfigFactory.from_dict({
        'extractor.csvtablecolumn.table_file_location':
        table_path,
        'extractor.csvtablecolumn.column_file_location':
        column_path,
        'loader.mysql_filesystem_csv.record_dir_path':
        record_files_folder,
        'loader.mysql_filesystem_csv.delete_created_directories':
        True,
        'publisher.mysql.record_files_directory':
        record_files_folder,
        'publisher.mysql.conn_string':
        mysql_conn_string,
        'publisher.mysql.job_publish_tag':
        'unique_tag'
    })
    job = DefaultJob(conf=job_config, task=task, publisher=MySQLCSVPublisher())
    job.launch()
Exemplo n.º 3
0
def create_last_updated_job():
    # loader saves data to these folders and publisher reads it from here
    tmp_folder = '/var/tmp/amundsen/last_updated_data'
    record_files_folder = f'{tmp_folder}/records'

    task = DefaultTask(extractor=EsLastUpdatedExtractor(),
                       loader=FSMySQLCSVLoader())

    job_config = ConfigFactory.from_dict({
        'extractor.es_last_updated.model_class':
        'databuilder.models.es_last_updated.ESLastUpdated',
        'loader.mysql_filesystem_csv.record_dir_path':
        record_files_folder,
        'loader.mysql_filesystem_csv.delete_created_directories':
        True,
        'publisher.mysql.record_files_directory':
        record_files_folder,
        'publisher.mysql.conn_string':
        mysql_conn_string,
        'publisher.mysql.job_publish_tag':
        'unique_tag'
    })

    return DefaultJob(conf=job_config,
                      task=task,
                      publisher=MySQLCSVPublisher())
    def test_load(self) -> None:
        actors = [Actor('Tom Cruise'), Actor('Meg Ryan')]
        movie = Movie('Top Gun', actors)

        loader = FSMySQLCSVLoader()
        loader.init(self._conf)
        loader.load(movie)

        loader.close()

        expected_record_path = '{}/../resources/fs_mysql_csv_loader/records'.format(
            os.path.join(os.path.dirname(__file__)))
        expected_records = self._get_csv_rows(expected_record_path)
        actual_records = self._get_csv_rows(
            self._conf.get_string(FSMySQLCSVLoader.RECORD_DIR_PATH))

        self.maxDiff = None
        self.assertDictEqual(expected_records, actual_records)
Exemplo n.º 5
0
def create_dashboard_tables_job():
    # loader saves data to these folders and publisher reads it from here
    tmp_folder = '/var/tmp/amundsen/dashboard_table'
    record_files_folder = f'{tmp_folder}/records'
    model_class = 'databuilder.models.dashboard.dashboard_table.DashboardTable'

    csv_extractor = CsvExtractor()
    csv_loader = FSMySQLCSVLoader()

    generic_transformer = GenericTransformer()
    dict_to_model_transformer = DictToModel()
    transformer = ChainedTransformer(
        transformers=[generic_transformer, dict_to_model_transformer],
        is_init_transformers=True)

    task = DefaultTask(extractor=csv_extractor,
                       loader=csv_loader,
                       transformer=transformer)
    publisher = MySQLCSVPublisher()

    job_config = ConfigFactory.from_dict({
        'extractor.csv.file_location':
        'example/sample_data/sample_dashboard_table.csv',
        'transformer.chained.transformer.generic.field_name':
        'table_ids',
        'transformer.chained.transformer.generic.callback_function':
        _str_to_list,
        'transformer.chained.transformer.dict_to_model.model_class':
        model_class,
        'loader.mysql_filesystem_csv.record_dir_path':
        record_files_folder,
        'loader.mysql_filesystem_csv.delete_created_directories':
        True,
        'publisher.mysql.record_files_directory':
        record_files_folder,
        'publisher.mysql.conn_string':
        mysql_conn_string,
        'publisher.mysql.job_publish_tag':
        'unique_tag',
    })

    return DefaultJob(conf=job_config, task=task, publisher=publisher)