def make_list_of_source_assets(): # these source assets are stashed inside a function so that they need to be discovered through # a list buddy_holly = SourceAsset(key=AssetKey("buddy_holly")) jerry_lee_lewis = SourceAsset(key=AssetKey("jerry_lee_lewis")) return [buddy_holly, jerry_lee_lewis]
def test_source_assets(): foo = SourceAsset(key=AssetKey("foo")) bar = SourceAsset(key=AssetKey("bar")) @repository def my_repo(): return [AssetGroup(assets=[], source_assets=[foo, bar])] assert my_repo.source_assets_by_key == { AssetKey("foo"): foo, AssetKey("bar"): bar }
def test_source_asset_partitions(): hourly_asset = SourceAsset( AssetKey("hourly_asset"), partitions_def=HourlyPartitionsDefinition( start_date="2021-05-05-00:00"), ) @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05")) def daily_asset(hourly_asset): assert hourly_asset is None class CustomIOManager(IOManager): def handle_output(self, context, obj): pass def load_input(self, context): key_range = context.asset_partition_key_range assert key_range.start == "2021-06-06-00:00" assert key_range.end == "2021-06-06-23:00" daily_job = build_assets_job( name="daily_job", assets=[daily_asset], source_assets=[hourly_asset], resource_defs={ "io_manager": IOManagerDefinition.hardcoded_io_manager(CustomIOManager()) }, ) assert daily_job.execute_in_process(partition_key="2021-06-06").success
# pylint: disable=redefined-outer-name from dagster import AssetKey, SourceAsset, asset source_asset = SourceAsset(AssetKey("source_asset")) @asset def asset1(source_asset): assert source_asset @asset def asset2(): pass
# pylint: disable=redefined-outer-name # start_marker from dagster import AssetGroup, AssetKey, SourceAsset, asset my_source_asset = SourceAsset(key=AssetKey("my_source_asset")) @asset def my_derived_asset(my_source_asset): return my_source_asset + [4] asset_group = AssetGroup(assets=[my_derived_asset], source_assets=[my_source_asset]) # end_marker
# pylint: disable=redefined-outer-name # start_marker import pandas as pd from dagster import AssetKey, SourceAsset, asset from pandas import DataFrame sfo_q2_weather_sample = SourceAsset( key=AssetKey("sfo_q2_weather_sample"), description="Weather samples, taken every five minutes at SFO", ) @asset def daily_temperature_highs(sfo_q2_weather_sample: DataFrame) -> DataFrame: """Computes the temperature high for each day""" sfo_q2_weather_sample["valid_date"] = pd.to_datetime(sfo_q2_weather_sample["valid"]) return sfo_q2_weather_sample.groupby("valid_date").max().rename(columns={"tmpf": "max_tmpf"}) @asset def hottest_dates(daily_temperature_highs: DataFrame) -> DataFrame: """Computes the 10 hottest dates""" return daily_temperature_highs.nlargest(10, "max_tmpf") # end_marker
from dagster import AssetKey, SourceAsset, asset patsy_cline = SourceAsset(key=AssetKey("patsy_cline")) @asset def miles_davis(): pass
@asset def upstream_asset(): return 5 upstream_asset_group = AssetGroup([upstream_asset]) @repository def upstream_assets_repository(): return [upstream_asset_group] source_assets = [SourceAsset(AssetKey("upstream_asset"))] @asset def downstream_asset1(upstream_asset): assert upstream_asset @asset def downstream_asset2(upstream_asset): assert upstream_asset downstream_asset_group1 = AssetGroup(assets=[downstream_asset1], source_assets=source_assets) downstream_asset_group2 = AssetGroup(assets=[downstream_asset2],
# pylint: disable=redefined-outer-name import time from dagster import AssetGroup, AssetKey, IOManager, IOManagerDefinition, SourceAsset, asset sfo_q2_weather_sample = SourceAsset(key=AssetKey("sfo_q2_weather_sample")) class DataFrame: pass class DummyIOManager(IOManager): def handle_output(self, context, obj: DataFrame): assert context assert obj def load_input(self, context): assert context return DataFrame() @asset def daily_temperature_highs(sfo_q2_weather_sample: DataFrame) -> DataFrame: """Computes the temperature high for each day""" assert sfo_q2_weather_sample time.sleep(3) return DataFrame() @asset
# pylint: disable=redefined-outer-name # start_marker import pandas as pd from pandas import DataFrame from dagster import AssetKey, SourceAsset, asset sfo_q2_weather_sample = SourceAsset( key=AssetKey("sfo_q2_weather_sample"), description="Weather samples, taken every five minutes at SFO", metadata={"format": "csv"}, ) @asset def daily_temperature_highs(sfo_q2_weather_sample: DataFrame) -> DataFrame: """Computes the temperature high for each day""" sfo_q2_weather_sample["valid_date"] = pd.to_datetime( sfo_q2_weather_sample["valid"]) return sfo_q2_weather_sample.groupby("valid_date").max().rename( columns={"tmpf": "max_tmpf"}) @asset def hottest_dates(daily_temperature_highs: DataFrame) -> DataFrame: """Computes the 10 hottest dates""" return daily_temperature_highs.nlargest(10, "max_tmpf") # end_marker
from dagster import AssetKey, SourceAsset, asset # importing this makes it show up twice when we collect everything from .asset_subpackage.another_module_with_assets import miles_davis assert miles_davis elvis_presley = SourceAsset(key=AssetKey("elvis_presley")) @asset def chuck_berry(): pass