Пример #1
0
 def materialization_and_expectation(_context):
     yield Materialization.file(path='/path/to/foo',
                                description='This is a table.')
     yield Materialization.file(path='/path/to/bar')
     yield ExpectationResult(success=True,
                             label='row_count',
                             description='passed')
     yield ExpectationResult(True)
     yield Output(True)
Пример #2
0
def df_output_schema(_context, path, value):
    with open(path, 'w') as fd:
        writer = csv.DictWriter(fd, fieldnames=value[0].keys())
        writer.writeheader()
        writer.writerows(rowdicts=value)

    return Materialization.file(path)
Пример #3
0
def spark_df_output_schema(_context, file_type, file_options, spark_df):
    if file_type == 'csv':
        spark_df.write.csv(file_options['path'],
                           header=file_options.get('header'),
                           sep=file_options.get('sep'))
        return Materialization.file(file_options['path'])
    else:
        check.failed('Unsupported file type: {}'.format(file_type))
Пример #4
0
def spark_df_materializer(_context, config, spark_df):
    file_type, file_options = list(config.items())[0]

    if file_type == 'csv':
        spark_df.write.csv(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'parquet':
        spark_df.write.parquet(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'json':
        spark_df.write.json(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'jdbc':
        spark_df.write.jdbc(**file_options)
        return Materialization.file(file_options['url'])
    elif file_type == 'orc':
        spark_df.write.orc(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'saveAsTable':
        spark_df.write.saveAsTable(**file_options)
        return Materialization.file(file_options['name'])
    elif file_type == 'text':
        spark_df.write.text(**file_options)
        return Materialization.file(file_options['path'])
    else:
        check.failed('Unsupported file type: {}'.format(file_type))
Пример #5
0
def dataframe_output_schema(_context, file_type, file_options, pandas_df):
    check.str_param(file_type, 'file_type')
    check.dict_param(file_options, 'file_options')
    check.inst_param(pandas_df, 'pandas_df', DataFrame)

    if file_type == 'csv':
        path = file_options['path']
        pandas_df.to_csv(path, index=False, **dict_without_keys(file_options, 'path'))
    elif file_type == 'parquet':
        pandas_df.to_parquet(file_options['path'])
    elif file_type == 'table':
        pandas_df.to_csv(file_options['path'], sep='\t', index=False)
    else:
        check.failed('Unsupported file_type {file_type}'.format(file_type=file_type))

    return Materialization.file(file_options['path'])
Пример #6
0
def dataframe_materializer(_context, config, pandas_df):
    check.inst_param(pandas_df, 'pandas_df', pd.DataFrame)
    file_type, file_options = list(config.items())[0]

    if file_type == 'csv':
        path = file_options['path']
        pandas_df.to_csv(path,
                         index=False,
                         **dict_without_keys(file_options, 'path'))
    elif file_type == 'parquet':
        pandas_df.to_parquet(file_options['path'])
    elif file_type == 'table':
        pandas_df.to_csv(file_options['path'], sep='\t', index=False)
    else:
        check.failed(
            'Unsupported file_type {file_type}'.format(file_type=file_type))

    return Materialization.file(file_options['path'])
Пример #7
0
def spark_df_output_schema(_context, file_type, file_options, spark_df):
    if file_type == 'csv':
        spark_df.write.csv(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'parquet':
        spark_df.write.parquet(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'json':
        spark_df.write.json(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'jdbc':
        spark_df.write.jdbc(**file_options)
        return Materialization.file(file_options['url'])
    elif file_type == 'orc':
        spark_df.write.orc(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'saveAsTable':
        spark_df.write.saveAsTable(**file_options)
        return Materialization.file(file_options['name'])
    elif file_type == 'text':
        spark_df.write.text(**file_options)
        return Materialization.file(file_options['path'])
    else:
        check.failed('Unsupported file type: {}'.format(file_type))
Пример #8
0
 def materialize(self, _context, table_type, table_metadata, value):
     path = self._path_for_table(table_type, table_metadata)
     value.write.parquet(path=path, mode='overwrite')
     return Materialization.file(path), None
Пример #9
0
 def emit_nothing(_context):
     yield Materialization.file(path='/path/')
Пример #10
0
 def yield_stuff(_context):
     yield Materialization.file('/path/to/nowhere')
Пример #11
0
 def materialize(self, _context, table_type, _table_metadata, value):
     path = self._path_for_table(table_type)
     value.write.csv(path=path, header=True, mode='overwrite')
     return Materialization.file(path), None
Пример #12
0
def write_sauce(_context, path, sauce):
    with open(path, 'w+') as fd:
        fd.write(sauce.flavor)
    return Materialization.file(path)
Пример #13
0
def test_out_of_pipeline_manager_yield_materialization():
    manager = Manager()
    assert manager.yield_event(
        Materialization.file('/path/to/artifact', 'artifact')
    ) == Materialization.file('/path/to/artifact', 'artifact')