from pandas import DataFrame, Series from dagster import AssetIn, asset @asset( ins={ "stories": AssetIn(metadata={"columns": ["id"]}), "comments": AssetIn(metadata={"columns": ["id", "user_id", "parent"]}), }, io_manager_key="warehouse_io_manager", ) def comment_stories(stories: DataFrame, comments: DataFrame) -> DataFrame: """ Comments linked to their root stories. Owners: [email protected], [email protected] """ comments.rename(columns={ "user_id": "commenter_id", "id": "comment_id" }, inplace=True) comments = comments.set_index("comment_id")[["commenter_id", "parent"]] stories = stories.set_index("id")[[]] full_comment_stories = DataFrame( index=Series(name="comment_id", dtype="int"), data={ "story_id": Series(dtype="int"), "commenter_id": Series(dtype="object")
svd = TruncatedSVD(n_components=n_components) svd.fit(user_story_matrix.matrix) total_explained_variance = svd.explained_variance_ratio_.sum() yield Output( svd, metadata={ "Total explained variance ratio": total_explained_variance, "Number of components": n_components, }, ) @asset( ins={"stories": AssetIn(metadata={"columns": ["id", "title"]})}, io_manager_key="warehouse_io_manager", ) def component_top_stories(recommender_model: TruncatedSVD, user_story_matrix: IndexedCooMatrix, stories: DataFrame): """ For each component in the collaborative filtering model, the titles of the top stories it's associated with. """ n_stories = 10 components_column = [] titles_column = [] story_titles = stories.set_index("id")
# pylint: disable=redefined-outer-name # start_marker from dagster import AssetIn, asset @asset(namespace=["one", "two", "three"]) def upstream_asset(): return [1, 2, 3] @asset(ins={"upstream_asset": AssetIn(namespace=["one", "two", "three"])}) def downstream_asset(upstream_asset): return upstream_asset + [4] # end_marker
from dagster import AssetIn, asset @asset def upstream_asset(): return [1, 2, 3] @asset(ins={"upstream": AssetIn("upstream_asset")}) def downstream_asset(upstream): return upstream + [4]
# pylint: disable=redefined-outer-name from dagster import AssetGroup, AssetIn, asset namespace1 = ["s3", "superdomain_1", "subdomain_1", "subsubdomain_1"] @asset(namespace=namespace1) def asset1(): pass @asset( namespace=["s3", "superdomain_2", "subdomain_2", "subsubdomain_2"], ins={"asset1": AssetIn(namespace=namespace1)}, ) def asset2(asset1): assert asset1 is None long_asset_keys_group = AssetGroup([asset1, asset2])