示例#1
0
def my_metadata_expectation_solid(context, df):
    do_some_transform(df)
    yield ExpectationResult(
        success=len(df) > 0,
        description='ensure dataframe has rows',
        metadata_entries=[
            EventMetadataEntry.text('Text-based metadata for this event',
                                    label='text_metadata'),
            EventMetadataEntry.url('http://mycoolsite.com/url_for_my_data',
                                   label='dashboard_url'),
            EventMetadataEntry.float(1.0 * len(df), 'row count'),
            EventMetadataEntry.float(calculate_bytes(df), 'size (bytes)'),
        ],
    )
    yield Output(df)
示例#2
0
def my_metadata_expectation_solid(context, df):
    do_some_transform(df)
    yield ExpectationResult(
        success=len(df) > 0,
        description="ensure dataframe has rows",
        metadata_entries=[
            EventMetadataEntry.text("Text-based metadata for this event",
                                    label="text_metadata"),
            EventMetadataEntry.url("http://mycoolsite.com/url_for_my_data",
                                   label="dashboard_url"),
            EventMetadataEntry.float(1.0 * len(df), "row count"),
            EventMetadataEntry.float(calculate_bytes(df), "size (bytes)"),
        ],
    )
    yield Output(df)
示例#3
0
def dbt_cli_run_operation(context) -> DbtCliResult:
    """This solid executes ``dbt run-operation`` via the dbt CLI."""
    logs, raw_output, return_code = execute_dbt(
        context.solid_config["dbt_executable"],
        command=("run-operation", context.solid_config["macro"]),
        flags_dict=passthrough_flags_only(context.solid_config, ("args", )),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    yield AssetMaterialization(
        asset_key=
        "dbt_cli_run_operation-shell_output",  # TODO: Perhaps derive asset key from CLI flags?
        description="The output of a shell execution of `dbt run-operation`.",
        metadata_entries=[
            EventMetadataEntry.float(
                label="return_code",
                value=float(return_code),
                description=
                "The return code of a shell exeuction of `dbt run-operation`.",
            ),
            EventMetadataEntry.text(
                label="raw_output",
                text=raw_output,
                description=
                "The raw output of a shell execution of `dbt run-operation`.",
            ),
        ],
    )

    yield Output(
        DbtCliResult(logs=logs, raw_output=raw_output,
                     return_code=return_code))
示例#4
0
def _timing_to_metadata(timings: List[Dict[str, Any]]) -> List[EventMetadataEntry]:
    metadata = []
    for timing in timings:
        if timing["name"] == "execute":
            desc = "Execution"
        elif timing["name"] == "compile":
            desc = "Compilation"
        else:
            continue

        started_at = dateutil.parser.isoparse(timing["started_at"])
        completed_at = dateutil.parser.isoparse(timing["completed_at"])
        duration = completed_at - started_at
        metadata.extend(
            [
                EventMetadataEntry.text(
                    text=started_at.isoformat(timespec="seconds"), label=f"{desc} Started At"
                ),
                EventMetadataEntry.text(
                    text=started_at.isoformat(timespec="seconds"), label=f"{desc} Completed At"
                ),
                EventMetadataEntry.float(value=duration.total_seconds(), label=f"{desc} Duration"),
            ]
        )
    return metadata
示例#5
0
    def made_solid(context):
        partition_date = datetime.strptime(context.solid_config["partition"],
                                           DEFAULT_DATE_FORMAT)
        if data_size_fn:
            data_size = data_size_fn(partition_date)
            sleep_time = sleep_factor * data_size

            time.sleep(sleep_time)

        if error_rate and random() < error_rate:
            raise Exception("blah")

        if asset_key:
            metadata_entries = materialization_metadata_entries or []
            if data_size_fn:
                metadata_entries.append(
                    EventMetadataEntry.float(data_size, "Data size (bytes)"))

            if len(metadata_entries) == 0:
                metadata_entries = None

            yield AssetMaterialization(
                asset_key=asset_key,
                metadata_entries=metadata_entries,
                partition=context.solid_config.get("partition"),
            )
示例#6
0
def _base_compute(context):
    time.sleep(context.solid_config["sleep"])

    if random() < context.solid_config["error_rate"]:
        raise Exception("blah")

    asset_key = None
    if context.solid_config.get("materialization_key_list") is not None:
        asset_key = AssetKey(
            context.solid_config.get("materialization_key_list"))
    elif context.solid_config.get("materialization_key") is not None:
        asset_key = AssetKey(context.solid_config.get("materialization_key"))

    if asset_key:
        metadata_entries = []
        if context.solid_config.get("materialization_text") is not None:
            metadata_entries.append(
                EventMetadataEntry.text(
                    context.solid_config.get("materialization_text"),
                    context.solid.name,
                ))

        if context.solid_config.get("materialization_url") is not None:
            metadata_entries.append(
                EventMetadataEntry.url(
                    context.solid_config.get("materialization_url"),
                    context.solid.name,
                ))

        if context.solid_config.get("materialization_path") is not None:
            metadata_entries.append(
                EventMetadataEntry.path(
                    context.solid_config.get("materialization_url"),
                    context.solid.name,
                ))

        if context.solid_config.get("materialization_json") is not None:
            metadata_entries.append(
                EventMetadataEntry.json(
                    context.solid_config.get("materialization_json"),
                    context.solid.name,
                ))

        if context.solid_config.get("materialization_value") is not None:
            metadata_entries = [
                EventMetadataEntry.float(
                    context.solid_config.get("materialization_value"),
                    context.solid.name,
                )
            ]

        if len(metadata_entries) == 0:
            metadata_entries = None

        yield AssetMaterialization(
            asset_key=asset_key,
            metadata_entries=metadata_entries,
        )

    yield Output(1)
    def handle_output(self, context, obj):
        file_path = os.path.join("my_base_dir", context.step_key, context.name)

        obj.to_csv(file_path)

        yield EventMetadataEntry.int(obj.shape[0], label="number of rows")
        yield EventMetadataEntry.float(obj["some_column"].mean(),
                                       "some_column mean")
示例#8
0
 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                "python class"),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
             EventMetadataEntry.float(float("nan"), "float NaN"),
             EventMetadataEntry.int(LONG_INT, "long int"),
             EventMetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             EventMetadataEntry.asset(AssetKey("my_asset"), "my asset"),
             EventMetadataEntry.table(
                 label="table",
                 records=[
                     TableRecord(foo=1, bar=2),
                     TableRecord(foo=3, bar=4),
                 ],
             ),
             EventMetadataEntry.table_schema(
                 label="table_schema",
                 schema=TableSchema(
                     columns=[
                         TableColumn(
                             name="foo",
                             type="integer",
                             constraints=TableColumnConstraints(
                                 unique=True),
                         ),
                         TableColumn(name="bar", type="string"),
                     ],
                     constraints=TableConstraints(other=["some constraint"
                                                         ], ),
                 ),
             ),
         ],
     )
     yield Output(None)
示例#9
0
def result_to_materialization(
        result: Dict[str, Any],
        asset_key_prefix: List[str] = None,
        docs_url: str = None) -> Optional[AssetMaterialization]:
    """
    This is a hacky solution that attempts to consolidate parsing many of the potential formats
    that dbt can provide its results in. This is known to work for CLI Outputs for dbt versions 0.18+,
    as well as RPC responses for a similar time period, but as the RPC response schema is not documented
    nor enforced, this can become out of date easily.
    """

    asset_key_prefix = check.opt_list_param(asset_key_prefix,
                                            "asset_key_prefix",
                                            of_type=str)

    # status comes from set of fields rather than "status"
    if "fail" in result:
        success = not result.get("fail") and not result.get(
            "skip") and not result.get("error")
    else:
        success = result["status"] == "success"

    if not success:
        return None

    # all versions represent timing the same way
    metadata = [
        EventMetadataEntry.float(value=result["execution_time"],
                                 label="Execution Time (seconds)")
    ] + _timing_to_metadata(result["timing"])

    # working with a response that contains the node block (RPC and CLI 0.18.x)
    if "node" in result:

        unique_id = result["node"]["unique_id"]
        metadata += _node_result_to_metadata(result["node"])
    else:
        unique_id = result["unique_id"]

    id_prefix = unique_id.split(".")

    # only generate materializations for models
    if id_prefix[0] != "model":
        return None

    if docs_url:
        metadata = [
            EventMetadataEntry.url(url=f"{docs_url}#!/model/{unique_id}",
                                   label="docs_url")
        ] + metadata

    return AssetMaterialization(
        description=f"dbt node: {unique_id}",
        metadata_entries=metadata,
        asset_key=asset_key_prefix + id_prefix,
    )
示例#10
0
 def materialize(_):
     yield AssetMaterialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry,
                                                "python class"),
             EventMetadataEntry.python_artifact(file_relative_path,
                                                "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
             EventMetadataEntry.float(float("nan"), "float NaN"),
         ],
     )
     yield Output(None)
示例#11
0
 def materialize_one(_):
     yield AssetMaterialization(
         asset_key=asset_key,
         metadata_entries=[
             EventMetadataEntry.text("hello", "text"),
             EventMetadataEntry.json({"hello": "world"}, "json"),
             EventMetadataEntry.float(1.0, "one"),
         ],
     )
     yield Output(1)
示例#12
0
 def materialize_one(_):
     yield AssetMaterialization(
         asset_key=asset_key,
         metadata_entries=[
             EventMetadataEntry.text('hello', 'text'),
             EventMetadataEntry.json({'hello': 'world'}, 'json'),
             EventMetadataEntry.float(1.0, 'one'),
         ],
     )
     yield Output(1)
示例#13
0
def my_metadata_output(context):
    df = get_some_data()
    yield Output(
        df,
        metadata_entries=[
            EventMetadataEntry.text("Text-based metadata for this event", label="text_metadata"),
            EventMetadataEntry.url("http://mycoolsite.com/url_for_my_data", label="dashboard_url"),
            EventMetadataEntry.int(len(df), "row count"),
            EventMetadataEntry.float(calculate_bytes(df), "size (bytes)"),
        ],
    )
示例#14
0
 def backcompat_materialize(_):
     yield Materialization(
         asset_key="all_types",
         description="a materialization with all metadata types",
         metadata_entries=[
             EventMetadataEntry.text("text is cool", "text"),
             EventMetadataEntry.url("https://bigty.pe/neato", "url"),
             EventMetadataEntry.fspath("/tmp/awesome", "path"),
             EventMetadataEntry.json({"is_dope": True}, "json"),
             EventMetadataEntry.python_artifact(EventMetadataEntry, "python class"),
             EventMetadataEntry.python_artifact(file_relative_path, "python function"),
             EventMetadataEntry.float(1.2, "float"),
             EventMetadataEntry.int(1, "int"),
             EventMetadataEntry.float(float("nan"), "float NaN"),
             EventMetadataEntry.int(LONG_INT, "long int"),
             EventMetadataEntry.pipeline_run("fake_run_id", "pipeline run"),
             EventMetadataEntry.asset(AssetKey("my_asset"), "my asset"),
         ],
     )
     yield Output(None)
示例#15
0
def _base_compute(context):
    time.sleep(context.solid_config['sleep'])

    if random() < context.solid_config['error_rate']:
        raise Exception('blah')

    if context.solid_config.get('materialization_key') is not None:
        metadata_entries = []
        if context.solid_config.get('materialization_text') is not None:
            metadata_entries.append(
                EventMetadataEntry.text(
                    context.solid_config.get('materialization_text'), context.solid.name,
                )
            )

        if context.solid_config.get('materialization_url') is not None:
            metadata_entries.append(
                EventMetadataEntry.url(
                    context.solid_config.get('materialization_url'), context.solid.name,
                )
            )

        if context.solid_config.get('materialization_path') is not None:
            metadata_entries.append(
                EventMetadataEntry.path(
                    context.solid_config.get('materialization_url'), context.solid.name,
                )
            )

        if context.solid_config.get('materialization_json') is not None:
            metadata_entries.append(
                EventMetadataEntry.json(
                    context.solid_config.get('materialization_json'), context.solid.name,
                )
            )

        if context.solid_config.get('materialization_value') is not None:
            metadata_entries = [
                EventMetadataEntry.float(
                    context.solid_config.get('materialization_value'), context.solid.name,
                )
            ]

        if len(metadata_entries) == 0:
            metadata_entries = None

        yield Materialization(
            label=context.solid.name,
            asset_key=context.solid_config.get('materialization_key'),
            metadata_entries=metadata_entries,
        )

    yield Output(1)
示例#16
0
def my_asset_key_materialization_solid(context, df):
    do_some_transform(df)
    persist_to_storage(df)
    yield AssetMaterialization(
        asset_key=AssetKey(["dashboard", "my_cool_site"]),
        description="Persisted result to storage",
        metadata_entries=[
            EventMetadataEntry.url("http://mycoolsite.com/dashboard", label="dashboard_url"),
            EventMetadataEntry.float(calculate_bytes(df), "size (bytes)"),
        ],
    )
    yield Output(df)
    def handle_output(self, context, obj):
        file_path = os.path.join(["my_base_dir", context.step_key, context.output_name])

        obj.to_csv(file_path)

        yield AssetMaterialization(
            asset_key=AssetKey(file_path),
            description="Persisted result to storage.",
            metadata_entries=[
                EventMetadataEntry.int(obj.shape[0], label="number of rows"),
                EventMetadataEntry.float(obj["some_column"].mean(), "some_column mean"),
            ],
        )
示例#18
0
def my_asset_key_materialization_solid(context, df):
    do_some_transform(df)
    persist_to_storage(df)
    yield Materialization(
        asset_key=AssetKey(['dashboard', 'my_cool_site']),
        description='Persisted result to storage',
        metadata_entries=[
            EventMetadataEntry.url('http://mycoolsite.com/dashboard',
                                   label='dashboard_url'),
            EventMetadataEntry.float(calculate_bytes(df), 'size (bytes)'),
        ],
    )
    yield Output(df)
示例#19
0
def my_metadata_materialization_solid(context):
    df = read_df()
    remote_storage_path = persist_to_storage(df)
    yield AssetMaterialization(
        asset_key="my_dataset",
        description="Persisted result to storage",
        metadata_entries=[
            EventMetadataEntry.text("Text-based metadata for this event", label="text_metadata"),
            EventMetadataEntry.fspath(remote_storage_path),
            EventMetadataEntry.url("http://mycoolsite.com/url_for_my_data", label="dashboard_url"),
            EventMetadataEntry.float(calculate_bytes(df), "size (bytes)"),
        ],
    )
    yield Output(remote_storage_path)
示例#20
0
def my_metadata_materialization_solid(context, df):
    do_some_transform(df)
    persist_to_storage(df)
    yield AssetMaterialization(
        asset_key="my_dataset",
        description="Persisted result to storage",
        metadata_entries=[
            EventMetadataEntry.text("Text-based metadata for this event", label="text_metadata"),
            EventMetadataEntry.fspath("/path/to/data/on/filesystem"),
            EventMetadataEntry.url("http://mycoolsite.com/url_for_my_data", label="dashboard_url"),
            EventMetadataEntry.float(calculate_bytes(df), "size (bytes)"),
        ],
    )
    yield Output(df)
示例#21
0
 def materialize(_):
     yield Materialization(
         label='all_types',
         description='a materialization with all metadata types',
         metadata_entries=[
             EventMetadataEntry.text('text is cool', 'text'),
             EventMetadataEntry.url('https://bigty.pe/neato', 'url'),
             EventMetadataEntry.fspath('/tmp/awesome', 'path'),
             EventMetadataEntry.json({'is_dope': True}, 'json'),
             EventMetadataEntry.python_artifact(EventMetadataEntry, 'python class'),
             EventMetadataEntry.python_artifact(file_relative_path, 'python function'),
             EventMetadataEntry.float(1.2, 'float'),
         ],
     )
     yield Output(None)
示例#22
0
def add_one_and_materialize(_, num):
    result = num + 1
    yield AssetMaterialization(
        description="Analytics dashboard for example pipeline",
        asset_key=AssetKey(["dashboards", "analytics_dashboard"]),
        metadata_entries=[
            EventMetadataEntry.url(
                "http://mycoolwebsite.com/dashboards/analytics", "dashboard url"
            ),
            EventMetadataEntry.float(result, "numeric value"),
        ],
    )

    # Because we are yielding a materialization event as well as an output, we need to explicitly
    # yield an `Output` instead of relying on the return value of the solid
    yield Output(result)
示例#23
0
def my_metadata_materialization_solid(context, df):
    do_some_transform(df)
    persist_to_storage(df)
    yield Materialization(
        label='my_dataset',
        description='Persisted result to storage',
        metadata_entries=[
            EventMetadataEntry.text('Text-based metadata for this event',
                                    label='text_metadata'),
            EventMetadataEntry.fspath('/path/to/data/on/filesystem'),
            EventMetadataEntry.url('http://mycoolsite.com/url_for_my_data',
                                   label='dashboard_url'),
            EventMetadataEntry.float(calculate_bytes(df), 'size (bytes)'),
        ],
    )
    yield Output(df)
示例#24
0
def dbt_cli_compile(context) -> DbtCliResult:
    """This solid executes ``dbt compile`` via the dbt CLI."""
    logs, raw_output, return_code = execute_dbt(
        context.solid_config["dbt_executable"],
        command=("compile", ),
        flags_dict=passthrough_flags_only(
            context.solid_config,
            (
                "parse-only",
                "threads",
                "no-version-check",
                "models",
                "exclude",
                "selector",
                "state",
                "full-refresh",
            ),
        ),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    yield AssetMaterialization(
        asset_key=
        "dbt_cli_compile-shell_output",  # TODO: Perhaps derive asset key from CLI flags?
        description="The output of a shell execution of `dbt compile`.",
        metadata_entries=[
            EventMetadataEntry.float(
                label="return_code",
                value=float(return_code),
                description=
                "The return code of a shell exeuction of `dbt compile`.",
            ),
            EventMetadataEntry.text(
                label="raw_output",
                text=raw_output,
                description=
                "The raw output of a shell execution of `dbt compile`.",
            ),
        ],
    )

    yield Output(
        DbtCliResult(logs=logs, raw_output=raw_output,
                     return_code=return_code))
示例#25
0
def dbt_cli_test(context) -> DbtCliStatsResult:
    """This solid executes ``dbt test`` via the dbt CLI."""
    logs, raw_output, return_code = execute_dbt(
        context.solid_config["dbt_executable"],
        command=("test", ),
        flags_dict=passthrough_flags_only(
            context.solid_config,
            ("data", "schema", "fail-fast", "threads", "models", "exclude")),
        log=context.log,
        warn_error=context.solid_config["warn-error"],
        ignore_handled_error=context.solid_config["ignore_handled_error"],
    )

    run_results = get_run_results(logs)

    yield AssetMaterialization(
        asset_key=
        "dbt_cli_test-shell_output",  # TODO: Perhaps derive asset key from CLI flags?
        description="The output of a shell execution of `dbt test`.",
        metadata_entries=[
            EventMetadataEntry.float(
                label="return_code",
                value=float(return_code),
                description=
                "The return code of a shell exeuction of `dbt test`.",
            ),
            EventMetadataEntry.json(
                label="run_results",
                data=run_results,
                description=
                "The summarized results of a shell execution of `dbt test`.",
            ),
            EventMetadataEntry.text(
                label="raw_output",
                text=raw_output,
                description=
                "The raw output of a shell execution of `dbt test`.",
            ),
        ],
    )

    yield Output(
        DbtCliStatsResult(logs=logs,
                          raw_output=raw_output,
                          return_code=return_code,
                          **run_results))
示例#26
0
def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), "r") as f:
        md_str = f.read()
        for table in raw_tables:
            yield AssetMaterialization(
                asset_key="table_info",
                metadata_entries=[
                    EventMetadataEntry.text(text=table, label="table_name"),
                    EventMetadataEntry.fspath(path="/path/to/{}".format(table),
                                              label="table_path"),
                    EventMetadataEntry.json(data={"name": table},
                                            label="table_data"),
                    EventMetadataEntry.url(
                        url="https://bigty.pe/{}".format(table),
                        label="table_name_big"),
                    EventMetadataEntry.md(md_str=md_str, label="table_blurb"),
                    EventMetadataEntry.int(29119888133298982934829348,
                                           label="big_int"),
                    EventMetadataEntry.float(float("nan"), label="float_nan"),
                ],
            )
示例#27
0
def generate_materializations(
    dbt_output: Union[DbtRpcOutput, DbtCliOutput],
    asset_key_prefix: Optional[List[str]] = None
) -> Iterator[AssetMaterialization]:
    """Yields ``AssetMaterializations`` for metadata in the dbt RPC ``DbtRpcOutput``."""

    asset_key_prefix = check.opt_list_param(asset_key_prefix,
                                            "asset_key_prefix",
                                            of_type=str)

    for node_result in dbt_output.result.results:
        # unique_id in run results is structured as <resource_type>.<package>.<resource_name>
        unique_id = node_result.unique_id
        resource_type = unique_id.split(".")[0] if unique_id else None
        if resource_type in {"model", "snapshot"}:
            success = not node_result.fail and not node_result.skip and not node_result.error
            if success:
                entries = [
                    EventMetadataEntry.text(text=str(node_result.status),
                                            label="Status"),
                    EventMetadataEntry.float(
                        value=node_result.execution_time,
                        label="Execution Time (seconds)",
                    ),
                ]
                # For users of dbt 0.18.x, preserve metadata.
                if node_result.node:
                    entries += [
                        EventMetadataEntry.text(
                            text=node_result.node["config"]["materialized"],
                            label="Materialization Strategy",
                        ),
                        EventMetadataEntry.text(
                            text=node_result.node["database"],
                            label="Database"),
                        EventMetadataEntry.text(
                            text=node_result.node["schema"], label="Schema"),
                        EventMetadataEntry.text(text=node_result.node["alias"],
                                                label="Alias"),
                        EventMetadataEntry.text(
                            text=node_result.node["description"],
                            label="Description"),
                    ]
                for step_timing in node_result.step_timings:
                    if step_timing.name == "execute":
                        execution_entries = [
                            EventMetadataEntry.text(
                                text=step_timing.started_at.isoformat(
                                    timespec="seconds"),
                                label="Execution Started At",
                            ),
                            EventMetadataEntry.text(
                                text=step_timing.completed_at.isoformat(
                                    timespec="seconds"),
                                label="Execution Completed At",
                            ),
                            EventMetadataEntry.float(
                                # this is a value like datetime.timedelta(microseconds=51484)
                                value=step_timing.duration.total_seconds(),
                                label="Execution Duration",
                            ),
                        ]
                        entries.extend(execution_entries)
                    if step_timing.name == "compile":
                        execution_entries = [
                            EventMetadataEntry.text(
                                text=step_timing.started_at.isoformat(
                                    timespec="seconds"),
                                label="Compilation Started At",
                            ),
                            EventMetadataEntry.text(
                                text=step_timing.completed_at.isoformat(
                                    timespec="seconds"),
                                label="Compilation Completed At",
                            ),
                            EventMetadataEntry.float(
                                # this is a value like datetime.timedelta(microseconds=51484)
                                value=step_timing.duration.total_seconds(),
                                label="Compilation Duration",
                            ),
                        ]
                        entries.extend(execution_entries)

                yield AssetMaterialization(
                    description="dbt node: {unique_id}".format(
                        unique_id=unique_id),
                    metadata_entries=entries,
                    asset_key=asset_key_prefix + unique_id.split("."),
                )
示例#28
0
def generate_materializations(
    dbt_output: Union[DbtRpcOutput, DbtCliOutput]
) -> Iterator[AssetMaterialization]:
    """Yields ``AssetMaterializations`` for metadata in the dbt RPC ``DbtRpcOutput``."""
    for node_result in dbt_output.result.results:
        if node_result.node["resource_type"] in ["model", "snapshot"]:
            success = not node_result.fail and not node_result.skip and not node_result.error
            if success:
                entries = [
                    EventMetadataEntry.json(data=node_result.node,
                                            label="Node"),
                    EventMetadataEntry.text(text=str(node_result.status),
                                            label="Status"),
                    EventMetadataEntry.float(
                        value=node_result.execution_time,
                        label="Execution Time (seconds)",
                    ),
                    EventMetadataEntry.text(
                        text=node_result.node["config"]["materialized"],
                        label="Materialization Strategy",
                    ),
                    EventMetadataEntry.text(text=node_result.node["database"],
                                            label="Database"),
                    EventMetadataEntry.text(text=node_result.node["schema"],
                                            label="Schema"),
                    EventMetadataEntry.text(text=node_result.node["alias"],
                                            label="Alias"),
                    EventMetadataEntry.text(
                        text=node_result.node["description"],
                        label="Description"),
                ]
                for step_timing in node_result.step_timings:
                    if step_timing.name == "execute":
                        execution_entries = [
                            EventMetadataEntry.text(
                                text=step_timing.started_at.isoformat(
                                    timespec="seconds"),
                                label="Execution Started At",
                            ),
                            EventMetadataEntry.text(
                                text=step_timing.completed_at.isoformat(
                                    timespec="seconds"),
                                label="Execution Completed At",
                            ),
                            EventMetadataEntry.float(
                                # this is a value like datetime.timedelta(microseconds=51484)
                                value=step_timing.duration.total_seconds(),
                                label="Execution Duration",
                            ),
                        ]
                        entries.extend(execution_entries)
                    if step_timing.name == "compile":
                        execution_entries = [
                            EventMetadataEntry.text(
                                text=step_timing.started_at.isoformat(
                                    timespec="seconds"),
                                label="Compilation Started At",
                            ),
                            EventMetadataEntry.text(
                                text=step_timing.completed_at.isoformat(
                                    timespec="seconds"),
                                label="Compilation Completed At",
                            ),
                            EventMetadataEntry.float(
                                # this is a value like datetime.timedelta(microseconds=51484)
                                value=step_timing.duration.total_seconds(),
                                label="Compilation Duration",
                            ),
                        ]
                        entries.extend(execution_entries)

                unique_id = node_result.node["unique_id"]
                yield AssetMaterialization(
                    description="dbt node: {unique_id}".format(
                        unique_id=unique_id),
                    metadata_entries=entries,
                    asset_key=unique_id,
                )
示例#29
0
 def _get_metadata(self, result: Dict[str,
                                      Any]) -> List[EventMetadataEntry]:
     return [
         EventMetadataEntry.float(value=result["execution_time"],
                                  label="Execution Time (seconds)")
     ]