Exemplo n.º 1
0
 def raw_file_op(_context):
     yield AssetMaterialization(
         asset_key="table_info",
         metadata={"table_path": EventMetadata.path("/path/to/{}.raw".format(name))},
     )
     yield do_expectation(_context, name)
     yield Output(name)
Exemplo n.º 2
0
def sort_by_calories(context, cereals):
    sorted_cereals = sorted(cereals,
                            key=lambda cereal: int(cereal["calories"]))
    least_caloric = sorted_cereals[0]["name"]
    most_caloric = sorted_cereals[-1]["name"]

    logger = get_dagster_logger()
    logger.info(f"Least caloric cereal: {least_caloric}")
    logger.info(f"Most caloric cereal: {most_caloric}")

    fieldnames = list(sorted_cereals[0].keys())
    sorted_cereals_csv_path = os.path.abspath(
        f"output/calories_sorted_{context.run_id}.csv")
    os.makedirs(os.path.dirname(sorted_cereals_csv_path), exist_ok=True)

    with open(sorted_cereals_csv_path, "w") as fd:
        writer = csv.DictWriter(fd, fieldnames)
        writer.writeheader()
        writer.writerows(sorted_cereals)

    yield AssetMaterialization(
        asset_key="sorted_cereals_csv",
        description="Cereals data frame sorted by caloric content",
        metadata={
            "sorted_cereals_csv_path":
            EventMetadata.path(sorted_cereals_csv_path)
        },
    )
    yield Output(None)
Exemplo n.º 3
0
def my_failure_metadata_solid():
    path = "/path/to/files"
    my_files = get_files(path)
    if len(my_files) == 0:
        raise Failure(
            description="No files to process",
            metadata={
                "filepath":
                EventMetadata.path(path),
                "dashboard_url":
                EventMetadata.url("http://mycoolsite.com/failures"),
            },
        )
    return some_calculation(my_files)
Exemplo n.º 4
0
def my_metadata_materialization_solid(context):
    df = read_df()
    remote_storage_path = persist_to_storage(df)
    yield AssetMaterialization(
        asset_key="my_dataset",
        description="Persisted result to storage",
        metadata={
            "text_metadata": "Text-based metadata for this event",
            "path": EventMetadata.path(remote_storage_path),
            "dashboard_url": EventMetadata.url("http://mycoolsite.com/url_for_my_data"),
            "size (bytes)": calculate_bytes(df),
        },
    )
    yield Output(remote_storage_path)
Exemplo n.º 5
0
def many_table_materializations(_context):
    with open(file_relative_path(__file__, MARKDOWN_EXAMPLE), "r") as f:
        md_str = f.read()
        for table in raw_tables:
            yield AssetMaterialization(
                asset_key="table_info",
                metadata={
                    "table_name": table,
                    "table_path": EventMetadata.path(f"/path/to/{table}"),
                    "table_data": {"name": table},
                    "table_name_big": EventMetadata.url(f"https://bigty.pe/{table}"),
                    "table_blurb": EventMetadata.md(md_str),
                    "big_int": 29119888133298982934829348,
                    "float_nan": float("nan"),
                },
            )
Exemplo n.º 6
0
def observes_dataset_op(context):
    df = read_df()
    remote_storage_path = persist_to_storage(df)
    context.log_event(
        AssetObservation(
            asset_key="my_dataset",
            metadata={
                "text_metadata":
                "Text-based metadata for this event",
                "path":
                EventMetadata.path(remote_storage_path),
                "dashboard_url":
                EventMetadata.url("http://mycoolsite.com/url_for_my_data"),
                "size (bytes)":
                calculate_bytes(df),
            },
        ))
    context.log_event(AssetMaterialization(asset_key="my_dataset"))
    return remote_storage_path
Exemplo n.º 7
0
def read_file(context):
    relative_filename = context.solid_config["filename"]
    directory = context.solid_config["directory"]
    filename = os.path.join(directory, relative_filename)
    try:
        fstats = os.stat(filename)
        context.log.info("Found file {}".format(relative_filename))
        yield AssetMaterialization(
            asset_key=AssetKey(["log_file", relative_filename]),
            metadata={
                "path": EventMetadata.path(filename),
                "File status": {
                    "size": fstats.st_size,
                    "ctime": fstats.st_ctime,
                    "mtime": fstats.st_mtime,
                },
            },
        )
        yield Output(relative_filename)
    except FileNotFoundError:
        context.log.error("No file found: {}".format(relative_filename))
Exemplo n.º 8
0
def many_materializations_and_passing_expectations(_context):
    tables = [
        "users",
        "groups",
        "events",
        "friends",
        "pages",
        "fans",
        "event_admins",
        "group_admins",
    ]

    for table in tables:
        yield AssetMaterialization(
            asset_key="table_info",
            metadata={
                "table_path": EventMetadata.path(f"/path/to/{table}.raw"),
            },
        )
        yield ExpectationResult(
            success=True,
            label="{table}.row_count".format(table=table),
            description="Row count passed for {table}".format(table=table),
        )