Python AssetMaterialization示例，dagster.AssetMaterialization Python示例

示例#1

0

显示文件

文件： test_util.py 项目： sd2k/dagster

 def materialization_and_expectation(_context):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
         ],
     )
     yield ExpectationResult(success=True,
                             label="row_count",
                             description="passed")
     yield ExpectationResult(True)
     yield Output(True)

示例#2

0

显示文件

文件： solids.py 项目： markjm610/dagster

def dbt_cli_test(context) -> DbtCliStatsResult:
    """This solid executes ``dbt test`` via the dbt CLI."""
    logs, raw_output, return_code = execute_dbt(
        context.solid_config["dbt_executable"],
        command=("test", ),
        flags_dict=passthrough_flags_only(
            context.solid_config,
            ("data", "schema", "fail-fast", "threads", "models", "exclude")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    run_results = get_run_results(logs)

    yield AssetMaterialization(
        asset_key=
        "dbt_cli_test-shell_output",  # TODO: Perhaps derive asset key from CLI flags?
        description="The output of a shell execution of `dbt test`.",
        metadata_entries=[
            EventMetadataEntry.float(
                label="return_code",
                value=float(return_code),
                description=
                "The return code of a shell exeuction of `dbt test`.",
            ),
            EventMetadataEntry.json(
                label="run_results",
                data=run_results,
                description=
                "The summarized results of a shell execution of `dbt test`.",
            ),
            EventMetadataEntry.text(
                label="raw_output",
                text=raw_output,
                description=
                "The raw output of a shell execution of `dbt test`.",
            ),
        ],
    )

    yield Output(
        DbtCliStatsResult(logs=logs,
                          raw_output=raw_output,
                          return_code=return_code,
                          **run_results))

示例#3

0

显示文件

文件： test_assets.py 项目： yingjiebyron/dagster

def _materialization_event_record(run_id, asset_key):
    return DagsterEventRecord(
        None,
        "",
        "debug",
        "",
        run_id,
        time.time() - 25,
        step_key="my_step_key",
        pipeline_name="my_pipeline",
        dagster_event=DagsterEvent(
            DagsterEventType.STEP_MATERIALIZATION.value,
            "my_pipeline",
            step_key="my_step_key",
            event_specific_data=StepMaterializationData(AssetMaterialization(asset_key=asset_key)),
        ),
    )

示例#4

0

显示文件

文件： setup.py 项目： markjm610/dagster

 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                "python class"),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                "python function"),
             EventMetadataEntry.float(1.2, "float"),
         ],
     )
     yield Output(None)

示例#5

0

显示文件

文件： data_frame.py 项目： helloworld/dagster

def dataframe_materializer(_context, config, pandas_df):
    check.inst_param(pandas_df, "pandas_df", pd.DataFrame)
    file_type, file_options = list(config.items())[0]

    if file_type == "csv":
        path = file_options["path"]
        pandas_df.to_csv(path, index=False, **dict_without_keys(file_options, "path"))
    elif file_type == "parquet":
        pandas_df.to_parquet(file_options["path"])
    elif file_type == "table":
        pandas_df.to_csv(file_options["path"], sep="\t", index=False)
    elif file_type == "pickle":
        pandas_df.to_pickle(file_options["path"])
    else:
        check.failed("Unsupported file_type {file_type}".format(file_type=file_type))

    return AssetMaterialization.file(file_options["path"])

示例#6

0

显示文件

def file_handle_to_s3(context, file_handle):
    bucket = context.solid_config['Bucket']
    key = context.solid_config['Key']

    with context.file_manager.read(file_handle, 'rb') as fileobj:
        context.resources.s3.upload_fileobj(fileobj, bucket, key)
        s3_file_handle = S3FileHandle(bucket, key)

        yield AssetMaterialization(
            asset_key=s3_file_handle.s3_path,
            metadata_entries=[
                EventMetadataEntry.path(s3_file_handle.s3_path,
                                        label=last_key(key))
            ],
        )

        yield Output(value=s3_file_handle, output_name='s3_file_handle')

示例#7

0

显示文件

文件： setup.py 项目： varokas/dagster-1

 def materialize(_):
     yield AssetMaterialization(
         asset_key='all_types',
         description='a materialization with all metadata types',
         metadata_entries=[
             EventMetadataEntry.text('text is cool', 'text'),
             EventMetadataEntry.url('https://bigty.pe/neato', 'url'),
             EventMetadataEntry.fspath('/tmp/awesome', 'path'),
             EventMetadataEntry.json({'is_dope': True}, 'json'),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                'python class'),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                'python function'),
             EventMetadataEntry.float(1.2, 'float'),
         ],
     )
     yield Output(None)

示例#8

0

显示文件

文件： solids.py 项目： markjm610/dagster

def dbt_cli_compile(context) -> DbtCliResult:
    """This solid executes ``dbt compile`` via the dbt CLI."""
    logs, raw_output, return_code = execute_dbt(
        context.solid_config["dbt_executable"],
        command=("compile", ),
        flags_dict=passthrough_flags_only(
            context.solid_config,
            (
                "parse-only",
                "threads",
                "no-version-check",
                "models",
                "exclude",
                "selector",
                "state",
                "full-refresh",
            ),
        ),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    yield AssetMaterialization(
        asset_key=
        "dbt_cli_compile-shell_output",  # TODO: Perhaps derive asset key from CLI flags?
        description="The output of a shell execution of `dbt compile`.",
        metadata_entries=[
            EventMetadataEntry.float(
                label="return_code",
                value=float(return_code),
                description=
                "The return code of a shell exeuction of `dbt compile`.",
            ),
            EventMetadataEntry.text(
                label="raw_output",
                text=raw_output,
                description=
                "The raw output of a shell execution of `dbt compile`.",
            ),
        ],
    )

    yield Output(
        DbtCliResult(logs=logs, raw_output=raw_output,
                     return_code=return_code))

示例#9

0

显示文件

文件： data_frame.py 项目： markjm610/dagster

def dataframe_materializer(_context, config, dask_df):
    check.inst_param(dask_df, "dask_df", dd.DataFrame)

    if "to" in config:
        to_specs = config["to"]

    # https://github.com/dagster-io/dagster/issues/2872
    else:
        to_specs = {
            to_type: to_options
            for to_type, to_options in config.items()
            if to_type in DataFrameToTypes
        }
        for key in to_specs.keys():
            warnings.warn(
                "Specifying {key}: is deprecated. Use to:{key}: instead.".
                format(key=key))

    for to_type, to_options in to_specs.items():
        if not to_type in DataFrameToTypes:
            check.failed(
                "Unsupported to_type {to_type}".format(to_type=to_type))

        # Get the metadata entry for the read_type in order to know which method
        # to call and whether it uses path as the first argument. And, make
        # to_options mutable if we need to pop off a path argument.
        to_meta = DataFrameToTypes[to_type]
        to_options = dict(to_options)

        # Get the to function and prepare its arguments.
        to_function = to_meta["function"]
        to_path = to_options.pop("path") if to_meta.get(
            "is_path_based", False) else None
        to_args = [to_path] if to_path else []
        to_kwargs = to_options

        # Get the Dask client from the dask resource, if available.
        client_context = (_context.resources.dask.client.as_current()
                          if hasattr(_context.resources, "dask") else
                          contextlib.suppress())
        with client_context:
            to_function(dask_df, *to_args, **to_kwargs)

        if to_path:
            yield AssetMaterialization.file(to_path)

示例#10

0

显示文件

 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                "python class"),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
             EventMetadataEntry.float(float("nan"), "float NaN"),
             EventMetadataEntry.int(LONG_INT, "long int"),
             EventMetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             EventMetadataEntry.asset(AssetKey("my_asset"), "my asset"),
             EventMetadataEntry.table(
                 label="table",
                 records=[
                     TableRecord(foo=1, bar=2),
                     TableRecord(foo=3, bar=4),
                 ],
             ),
             EventMetadataEntry.table_schema(
                 label="table_schema",
                 schema=TableSchema(
                     columns=[
                         TableColumn(
                             name="foo",
                             type="integer",
                             constraints=TableColumnConstraints(
                                 unique=True),
                         ),
                         TableColumn(name="bar", type="string"),
                     ],
                     constraints=TableConstraints(other=["some constraint"
                                                         ], ),
                 ),
             ),
         ],
     )
     yield Output(None)

示例#11

0

显示文件

文件： solids.py 项目： varokas/dagster-1

def load_data_to_database_from_spark(context, data_frame: DataFrame):
    context.resources.db_info.load_table(data_frame,
                                         context.solid_config['table_name'])

    table_name = context.solid_config['table_name']
    yield AssetMaterialization(
        asset_key='table:{table_name}'.format(table_name=table_name),
        description=
        ('Persisted table {table_name} in database configured in the db_info resource.'
         ).format(table_name=table_name),
        metadata_entries=[
            EventMetadataEntry.text(label='Host',
                                    text=context.resources.db_info.host),
            EventMetadataEntry.text(label='Db',
                                    text=context.resources.db_info.db_name),
        ],
    )
    yield Output(value=table_name, output_name='table_name')

示例#12

0

显示文件

def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), 'r') as f:
        md_str = f.read()
        for table in raw_tables:
            yield AssetMaterialization(
                asset_key='table_info',
                metadata_entries=[
                    EventMetadataEntry.text(text=table, label='table_name'),
                    EventMetadataEntry.fspath(path='/path/to/{}'.format(table),
                                              label='table_path'),
                    EventMetadataEntry.json(data={'name': table},
                                            label='table_data'),
                    EventMetadataEntry.url(
                        url='https://bigty.pe/{}'.format(table),
                        label='table_name_big'),
                    EventMetadataEntry.md(md_str=md_str, label='table_blurb'),
                ],
            )

示例#13

0

显示文件

def dbt_cli_snapshot_freshness(context) -> Dict:
    """This solid executes ``dbt source snapshot-freshness`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("source", "snapshot-freshness"),
        flags_dict=passthrough_flags_only(context.solid_config, ("select", "output", "threads")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    yield AssetMaterialization(
        asset_key="dbt_source_snapshot-freshness_cli_output",
        description="Output from the CLI execution of `dbt source snapshot-freshness`.",
        metadata_entries=[EventMetadataEntry.json(cli_output, label="CLI Output")],
    )

    yield Output(cli_output)

示例#14

0

显示文件

def dbt_cli_run_operation(context) -> Dict:
    """This solid executes ``dbt run-operation`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("run-operation", context.solid_config["macro"]),
        flags_dict=passthrough_flags_only(context.solid_config, ("args",)),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    yield AssetMaterialization(
        asset_key="dbt_run_operation_cli_output",
        description="Output from the CLI execution of `dbt run-operation`.",
        metadata_entries=[EventMetadataEntry.json(cli_output, label="CLI Output")],
    )

    yield Output(cli_output)

示例#15

0

显示文件

文件： many_events.py 项目： zuik/dagster

def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), "r") as f:
        md_str = f.read()
        for table in raw_tables:
            yield AssetMaterialization(
                asset_key="table_info",
                metadata_entries=[
                    EventMetadataEntry.text(text=table, label="table_name"),
                    EventMetadataEntry.fspath(path="/path/to/{}".format(table),
                                              label="table_path"),
                    EventMetadataEntry.json(data={"name": table},
                                            label="table_data"),
                    EventMetadataEntry.url(
                        url="https://bigty.pe/{}".format(table),
                        label="table_name_big"),
                    EventMetadataEntry.md(md_str=md_str, label="table_blurb"),
                ],
            )

示例#16

0

显示文件

def dataframe_materializer(_context, config, pandas_df):
    check.inst_param(pandas_df, 'pandas_df', pd.DataFrame)
    file_type, file_options = list(config.items())[0]

    if file_type == 'csv':
        path = file_options['path']
        pandas_df.to_csv(path,
                         index=False,
                         **dict_without_keys(file_options, 'path'))
    elif file_type == 'parquet':
        pandas_df.to_parquet(file_options['path'])
    elif file_type == 'table':
        pandas_df.to_csv(file_options['path'], sep='\t', index=False)
    else:
        check.failed(
            'Unsupported file_type {file_type}'.format(file_type=file_type))

    return AssetMaterialization.file(file_options['path'])

示例#17

0

显示文件

def my_metadata_materialization_solid(context):
    df = read_df()
    remote_storage_path = persist_to_storage(df)
    yield AssetMaterialization(
        asset_key="my_dataset",
        description="Persisted result to storage",
        metadata={
            "text_metadata":
            "Text-based metadata for this event",
            "path":
            EventMetadata.path(remote_storage_path),
            "dashboard_url":
            EventMetadata.url("http://mycoolsite.com/url_for_my_data"),
            "size (bytes)":
            calculate_bytes(df),
        },
    )
    yield Output(remote_storage_path)

示例#18

0

显示文件

def load_data_to_database_from_spark(context, data_frame):
    context.resources.db_info.load_table(data_frame,
                                         context.solid_config["table_name"])

    table_name = context.solid_config["table_name"]
    yield AssetMaterialization(
        asset_key="table:{table_name}".format(table_name=table_name),
        description=
        ("Persisted table {table_name} in database configured in the db_info resource."
         ).format(table_name=table_name),
        metadata_entries=[
            EventMetadataEntry.text(label="Host",
                                    text=context.resources.db_info.host),
            EventMetadataEntry.text(label="Db",
                                    text=context.resources.db_info.db_name),
        ],
    )
    yield Output(value=table_name, output_name="table_name")

示例#19

0

显示文件

文件： test_partitioned_assets.py 项目： trevenrawr/dagster

def test_access_partition_keys_from_context_only_one_asset_partitioned():
    upstream_partitions_def = StaticPartitionsDefinition(["a", "b", "c"])

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            if context.op_def.name == "upstream_asset":
                assert context.asset_partition_key == "b"
            elif context.op_def.name in [
                    "downstream_asset", "double_downstream_asset"
            ]:
                assert not context.has_asset_partitions
                with pytest.raises(Exception):  # TODO: better error message
                    assert context.asset_partition_key_range
            else:
                assert False

        def load_input(self, context):
            assert not context.has_asset_partitions

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset(context):
        assert context.output_asset_partition_key() == "b"

    @asset
    def downstream_asset(upstream_asset):
        assert upstream_asset is None

    @asset
    def double_downstream_asset(downstream_asset):
        assert downstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset, double_downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="b")
    assert result.asset_materializations_for_node("upstream_asset") == [
        AssetMaterialization(asset_key=AssetKey(["upstream_asset"]),
                             partition="b")
    ]

示例#20

0

显示文件

def observes_dataset_op(context):
    df = read_df()
    remote_storage_path = persist_to_storage(df)
    context.log_event(
        AssetObservation(
            asset_key="my_dataset",
            metadata={
                "text_metadata":
                "Text-based metadata for this event",
                "path":
                EventMetadata.path(remote_storage_path),
                "dashboard_url":
                EventMetadata.url("http://mycoolsite.com/url_for_my_data"),
                "size (bytes)":
                calculate_bytes(df),
            },
        ))
    context.log_event(AssetMaterialization(asset_key="my_dataset"))
    return remote_storage_path

示例#21

0

显示文件

def dbt_cli_snapshot(context) -> Dict:
    """This solid executes ``dbt snapshot`` via the dbt CLI."""
    cli_output = execute_cli(
        context.solid_config["dbt_executable"],
        command=("snapshot",),
        flags_dict=passthrough_flags_only(context.solid_config, ("threads", "models", "exclude")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    if context.solid_config["yield_materializations"]:
        yield AssetMaterialization(
            asset_key="dbt_snapshot_cli_output",
            description="Output from the CLI execution of `dbt snapshot`.",
            metadata_entries=[EventMetadataEntry.json(cli_output, label="CLI Output")],
        )

    yield Output(cli_output)

示例#22

0

显示文件

def materialize_gdelt_mining_asset(context, gdelt_mined_events_filename):
    # Extracting which file we're materializing
    filename = gdelt_mined_events_filename.splitlines()[-1]

    # Getting csv file and transform to pandas dataframe
    s3 = boto3.resource('s3')
    obj = s3.Object('discursus-io', filename)
    df_gdelt_events = pd.read_csv(StringIO(obj.get()['Body'].read().decode('utf-8')), sep='\t')
    
    # Materialize asset
    yield AssetMaterialization(
        asset_key = "gdelt_events",
        description = "List of events mined on GDELT",
        metadata={
            "path": "s3://discursus-io/" + filename,
            "rows": df_gdelt_events.index.size
        }
    )
    yield Output(df_gdelt_events)

示例#23

0

显示文件

文件： setup.py 项目： amarrella/dagster

 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry, "python class"),
             EventMetadataEntry.python_artifact(file_relative_path, "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
             EventMetadataEntry.float(float("nan"), "float NaN"),
             EventMetadataEntry.int(LONG_INT, "long int"),
             EventMetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             EventMetadataEntry.asset(AssetKey("my_asset"), "my asset"),
         ],
     )
     yield Output(None)

示例#24

0

显示文件

文件： data_frame.py 项目： mikaylaedwards/dagster

def dataframe_materializer(_context, config, dask_df):
    check.inst_param(dask_df, "dask_df", dd.DataFrame)
    file_type, file_options = list(config.items())[0]
    path = file_options.get("path")

    if file_type == "csv":
        dask_df.to_csv(path, **dict_without_keys(file_options, "path"))
    elif file_type == "parquet":
        dask_df.to_parquet(path, **dict_without_keys(file_options, "path"))
    elif file_type == "hdf":
        dask_df.to_hdf(path, **dict_without_keys(file_options, "path"))
    elif file_type == "json":
        dask_df.to_json(path, **dict_without_keys(file_options, "path"))
    elif file_type == "sql":
        dask_df.to_sql(**file_options)
    else:
        check.failed(
            "Unsupported file_type {file_type}".format(file_type=file_type))

    return AssetMaterialization.file(path)

示例#25

0

显示文件

文件： longitudinal.py 项目： trevenrawr/dagster

    def made_op(context):
        partition_date = datetime.strptime(context.op_config["partition"], DEFAULT_DATE_FORMAT)
        if data_size_fn:
            data_size = data_size_fn(partition_date)
            sleep_time = sleep_factor * data_size

            time.sleep(sleep_time)

        rand = random()
        if error_rate and rand < error_rate:
            raise IntentionalRandomFailure(f"random {rand} < error rate {error_rate}")

        if asset_key:
            metadata = {"Data size (bytes)": data_size} if data_size_fn else None

            yield AssetMaterialization(
                asset_key=asset_key,
                metadata=metadata,
                partition=context.op_config.get("partition"),
            )

示例#26

0

显示文件

def dataframe_materializer(_context, config, dask_df):
    check.inst_param(dask_df, 'dask_df', dd.DataFrame)
    file_type, file_options = list(config.items())[0]
    path = file_options.get('path')

    if file_type == 'csv':
        dask_df.to_csv(path, **dict_without_keys(file_options, 'path'))
    elif file_type == 'parquet':
        dask_df.to_parquet(path, **dict_without_keys(file_options, 'path'))
    elif file_type == 'hdf':
        dask_df.to_hdf(path, **dict_without_keys(file_options, 'path'))
    elif file_type == 'json':
        dask_df.to_json(path, **dict_without_keys(file_options, 'path'))
    elif file_type == 'sql':
        dask_df.to_sql(**file_options)
    else:
        check.failed(
            'Unsupported file_type {file_type}'.format(file_type=file_type))

    return AssetMaterialization.file(path)

示例#27

0

显示文件

文件： cereal_selection.py 项目： EricXRen/dagster_play

def compare_calories(context, cereals, least_hot, least_cold):
    cereals_df = pd.DataFrame(cereals)

    def get_calories(name):
        return cereals_df[cereals_df["name"] == name]["calories"].iloc[0]

    cereal_choice = (
        least_hot if get_calories(least_hot) > get_calories(least_cold) else least_cold
    )
    context.log.info(
        f"Compare the calories of hot and cold cereals: {cereal_choice} is healthier"
    )
    yield AssetMaterialization(
        asset_key="cereal_choice",
        description="Which cereal is healthiest",
        metadata_entries=[
            EventMetadataEntry.text(cereal_choice, "Cereal Choice")
        ],
    )
    yield Output(cereal_choice)

示例#28

0

显示文件

文件： solids.py 项目： xaniasd/dagster

def download_zipfile_from_url(context, file_name: str, base_url: str):
    url = "/".join([base_url, file_name])
    # mount dirs onto volume
    target = os.path.join(context.resources.volume, file_name)
    if not os.path.exists(target):
        _download_zipfile_from_url(
            url,
            target,
            context.solid_config["chunk_size"],
        )
    yield AssetMaterialization(
        asset_key=file_name,
        metadata_entries=[
            EventMetadataEntry.text(url, "zipfile url source"),
            EventMetadataEntry.text(target, "zipfile filepath"),
            EventMetadataEntry.text(str(os.path.getsize(target)),
                                    "size of zipfile (bytes)"),
        ],
    )
    yield Output(target)

示例#29

0

显示文件

文件： log_file.py 项目： trevenrawr/dagster

def read_file(context):
    relative_filename = context.op_config["filename"]
    directory = context.op_config["directory"]
    filename = os.path.join(directory, relative_filename)
    try:
        fstats = os.stat(filename)
        context.log.info("Found file {}".format(relative_filename))
        yield AssetMaterialization(
            asset_key=AssetKey(["log_file", relative_filename]),
            metadata={
                "path": MetadataValue.path(filename),
                "File status": {
                    "size": fstats.st_size,
                    "ctime": fstats.st_ctime,
                    "mtime": fstats.st_mtime,
                },
            },
        )
        yield Output(relative_filename)
    except FileNotFoundError:
        context.log.error("No file found: {}".format(relative_filename))

示例#30

0

显示文件

文件： solids.py 项目： xaniasd/dagster

def upload_pickled_object_to_gcs_bucket(context, value: Any, bucket_name: str,
                                        file_name: str):
    gcs_bucket = context.resources.gcs_client.get_bucket(bucket_name)
    key = "{}-{}".format(file_name, uuid.uuid4())
    with tempfile.TemporaryFile("w+b") as fp:
        pickle.dump(value, fp, PICKLE_PROTOCOL)
        # Done because you can't upload the contents of a file outside the context manager if it's a tempfile.
        fp.seek(0)
        gcs_bucket.blob(key).upload_from_file(fp)

    gcs_url = "gs://{bucket_name}/{key}".format(bucket_name=bucket_name,
                                                key=key)

    yield AssetMaterialization(
        asset_key=gcs_url,
        description="Serialized object to Google Cloud Storage Bucket",
        metadata_entries=[
            EventMetadataEntry.text(gcs_url, "google cloud storage URI"),
        ],
    )
    yield Output(value)