示例#1
0
def make_list_of_source_assets():
    # these source assets are stashed inside a function so that they need to be discovered through
    # a list
    buddy_holly = SourceAsset(key=AssetKey("buddy_holly"))
    jerry_lee_lewis = SourceAsset(key=AssetKey("jerry_lee_lewis"))

    return [buddy_holly, jerry_lee_lewis]
示例#2
0
def test_source_assets():
    foo = SourceAsset(key=AssetKey("foo"))
    bar = SourceAsset(key=AssetKey("bar"))

    @repository
    def my_repo():
        return [AssetGroup(assets=[], source_assets=[foo, bar])]

    assert my_repo.source_assets_by_key == {
        AssetKey("foo"): foo,
        AssetKey("bar"): bar
    }
def test_source_asset_partitions():
    hourly_asset = SourceAsset(
        AssetKey("hourly_asset"),
        partitions_def=HourlyPartitionsDefinition(
            start_date="2021-05-05-00:00"),
    )

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def daily_asset(hourly_asset):
        assert hourly_asset is None

    class CustomIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            key_range = context.asset_partition_key_range
            assert key_range.start == "2021-06-06-00:00"
            assert key_range.end == "2021-06-06-23:00"

    daily_job = build_assets_job(
        name="daily_job",
        assets=[daily_asset],
        source_assets=[hourly_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(CustomIOManager())
        },
    )
    assert daily_job.execute_in_process(partition_key="2021-06-06").success
示例#4
0
# pylint: disable=redefined-outer-name
from dagster import AssetKey, SourceAsset, asset

source_asset = SourceAsset(AssetKey("source_asset"))


@asset
def asset1(source_asset):
    assert source_asset


@asset
def asset2():
    pass
示例#5
0
# pylint: disable=redefined-outer-name
# start_marker
from dagster import AssetGroup, AssetKey, SourceAsset, asset

my_source_asset = SourceAsset(key=AssetKey("my_source_asset"))


@asset
def my_derived_asset(my_source_asset):
    return my_source_asset + [4]


asset_group = AssetGroup(assets=[my_derived_asset],
                         source_assets=[my_source_asset])

# end_marker
示例#6
0
# pylint: disable=redefined-outer-name

# start_marker
import pandas as pd
from dagster import AssetKey, SourceAsset, asset
from pandas import DataFrame

sfo_q2_weather_sample = SourceAsset(
    key=AssetKey("sfo_q2_weather_sample"),
    description="Weather samples, taken every five minutes at SFO",
)


@asset
def daily_temperature_highs(sfo_q2_weather_sample: DataFrame) -> DataFrame:
    """Computes the temperature high for each day"""
    sfo_q2_weather_sample["valid_date"] = pd.to_datetime(sfo_q2_weather_sample["valid"])
    return sfo_q2_weather_sample.groupby("valid_date").max().rename(columns={"tmpf": "max_tmpf"})


@asset
def hottest_dates(daily_temperature_highs: DataFrame) -> DataFrame:
    """Computes the 10 hottest dates"""
    return daily_temperature_highs.nlargest(10, "max_tmpf")


# end_marker
示例#7
0
from dagster import AssetKey, SourceAsset, asset

patsy_cline = SourceAsset(key=AssetKey("patsy_cline"))


@asset
def miles_davis():
    pass
示例#8
0

@asset
def upstream_asset():
    return 5


upstream_asset_group = AssetGroup([upstream_asset])


@repository
def upstream_assets_repository():
    return [upstream_asset_group]


source_assets = [SourceAsset(AssetKey("upstream_asset"))]


@asset
def downstream_asset1(upstream_asset):
    assert upstream_asset


@asset
def downstream_asset2(upstream_asset):
    assert upstream_asset


downstream_asset_group1 = AssetGroup(assets=[downstream_asset1],
                                     source_assets=source_assets)
downstream_asset_group2 = AssetGroup(assets=[downstream_asset2],
示例#9
0
# pylint: disable=redefined-outer-name
import time

from dagster import AssetGroup, AssetKey, IOManager, IOManagerDefinition, SourceAsset, asset

sfo_q2_weather_sample = SourceAsset(key=AssetKey("sfo_q2_weather_sample"))


class DataFrame:
    pass


class DummyIOManager(IOManager):
    def handle_output(self, context, obj: DataFrame):
        assert context
        assert obj

    def load_input(self, context):
        assert context
        return DataFrame()


@asset
def daily_temperature_highs(sfo_q2_weather_sample: DataFrame) -> DataFrame:
    """Computes the temperature high for each day"""
    assert sfo_q2_weather_sample
    time.sleep(3)
    return DataFrame()


@asset
示例#10
0
# pylint: disable=redefined-outer-name

# start_marker
import pandas as pd
from pandas import DataFrame

from dagster import AssetKey, SourceAsset, asset

sfo_q2_weather_sample = SourceAsset(
    key=AssetKey("sfo_q2_weather_sample"),
    description="Weather samples, taken every five minutes at SFO",
    metadata={"format": "csv"},
)


@asset
def daily_temperature_highs(sfo_q2_weather_sample: DataFrame) -> DataFrame:
    """Computes the temperature high for each day"""
    sfo_q2_weather_sample["valid_date"] = pd.to_datetime(
        sfo_q2_weather_sample["valid"])
    return sfo_q2_weather_sample.groupby("valid_date").max().rename(
        columns={"tmpf": "max_tmpf"})


@asset
def hottest_dates(daily_temperature_highs: DataFrame) -> DataFrame:
    """Computes the 10 hottest dates"""
    return daily_temperature_highs.nlargest(10, "max_tmpf")


# end_marker
示例#11
0
from dagster import AssetKey, SourceAsset, asset

# importing this makes it show up twice when we collect everything
from .asset_subpackage.another_module_with_assets import miles_davis

assert miles_davis

elvis_presley = SourceAsset(key=AssetKey("elvis_presley"))


@asset
def chuck_berry():
    pass