if not rows_with_unexpected_buckets.empty: raise ColumnConstraintViolationException( constraint_name=self.name, constraint_description=self.error_description, column_name=column_name, offending_rows=rows_with_unexpected_buckets, ) CustomTripDataFrame = create_dagster_pandas_dataframe_type( name="CustomTripDataFrame", columns=[ PandasColumn( "amount_paid", constraints=[ ColumnDTypeInSetConstraint({"int64"}), DivisibleByFiveConstraint() ], ) ], ) # end_custom_col @solid( output_defs=[ OutputDefinition(name="custom_trip_dataframe", dagster_type=CustomTripDataFrame) ], ) def load_custom_trip_dataframe(_) -> DataFrame: return read_csv(
def validate(self, dataframe, column_name): rows_with_unexpected_buckets = dataframe[dataframe[column_name].apply(lambda x: x % 5 != 0)] if not rows_with_unexpected_buckets.empty: raise ColumnConstraintViolationException( constraint_name=self.name, constraint_description=self.error_description, column_name=column_name, offending_rows=rows_with_unexpected_buckets, ) CustomTripDataFrame = create_dagster_pandas_dataframe_type( name='CustomTripDataFrame', columns=[ PandasColumn( 'amount_paid', constraints=[ColumnDTypeInSetConstraint({'int64'}), DivisibleByFiveConstraint()], ) ], ) @solid( output_defs=[OutputDefinition(name='custom_trip_dataframe', dagster_type=CustomTripDataFrame)], ) def load_custom_trip_dataframe(_) -> DataFrame: return read_csv( script_relative_path('./ebike_trips.csv'), parse_dates=['start_time', 'end_time'], date_parser=lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f'), )
from datetime import datetime from dagster_pandas import PandasColumn, create_dagster_pandas_dataframe_type from pandas import DataFrame, read_csv from dagster import OutputDefinition, pipeline, solid from dagster.utils import script_relative_path TripDataFrame = create_dagster_pandas_dataframe_type( name="TripDataFrame", columns=[ PandasColumn.integer_column("bike_id", min_value=0), PandasColumn.categorical_column("color", categories={"red", "green", "blue"}), PandasColumn.datetime_column( "start_time", min_datetime=datetime(year=2020, month=2, day=10) ), PandasColumn.datetime_column("end_time", min_datetime=datetime(year=2020, month=2, day=10)), PandasColumn.string_column("station"), PandasColumn.exists("amount_paid"), PandasColumn.boolean_column("was_member"), ], ) @solid(output_defs=[OutputDefinition(name="trip_dataframe", dagster_type=TripDataFrame)]) def load_trip_dataframe(_) -> DataFrame: return read_csv( script_relative_path("./ebike_trips.csv"), parse_dates=["start_time", "end_time"], date_parser=lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"), )
from dagster import solid, SolidExecutionContext, Field, Array, String from dagster_pandas import PandasColumn, create_dagster_pandas_dataframe_type from pandas import DataFrame from typing import Any, Optional, List, TYPE_CHECKING from azmeta.access.resource_graph import query_dataframe if TYPE_CHECKING: ResourcesDataFrame = Any # DataFrame # Pandas has no type info yet. else: ResourcesDataFrame = create_dagster_pandas_dataframe_type( name='ResourcesDataFrame', columns=[ PandasColumn.string_column('resource_id'), PandasColumn.string_column('subscription_id'), ], ) @solid( config_schema={ 'subscriptions': Field(Array(String), description='The subscriptions to query in the Resource Graph.'), 'filters': Field(String, is_required=False, description='Conditions for a KQL where operator.'), 'custom_projections': Field(String, is_required=False, description='Assignments for a KQL project operator.'),
import functools from azmeta.access.monitor_logs import (PerformanceCounterSpec, query_dataframe_by_workspace_chunk, build_perf_counter_percentile_query, build_disk_percentile_query) from azmeta.access.utils.chunking import build_grouped_chunk_list from .resources import ResourcesDataFrame from .specifications import AzureComputeSpecifications if TYPE_CHECKING: UtilizationDataFrame = Any # DataFrame # Pandas has no type info yet. else: UtilizationDataFrame = create_dagster_pandas_dataframe_type( name='UtilizationDataFrame', columns=[ PandasColumn.string_column('resource_id'), PandasColumn.float_column('percentile_50th'), PandasColumn.float_column('percentile_80th'), PandasColumn.float_column('percentile_90th'), PandasColumn.float_column('percentile_95th'), PandasColumn.float_column('percentile_99th'), PandasColumn.float_column('max'), PandasColumn.integer_column('samples'), ], ) @solid(required_resource_keys={'azure_monitor'}) def query_cpu_utilization( context: SolidExecutionContext, resources: ResourcesDataFrame) -> UtilizationDataFrame:
from datetime import datetime from dagster_pandas import PandasColumn, create_dagster_pandas_dataframe_type from pandas import DataFrame, read_csv from dagster import OutputDefinition, pipeline, solid from dagster.utils import script_relative_path TripDataFrame = create_dagster_pandas_dataframe_type( name='TripDataFrame', columns=[ PandasColumn.integer_column('bike_id', min_value=0), PandasColumn.categorical_column('color', categories={'red', 'green', 'blue'}), PandasColumn.datetime_column('start_time', min_datetime=datetime(year=2020, month=2, day=10)), PandasColumn.datetime_column('end_time', min_datetime=datetime(year=2020, month=2, day=10)), PandasColumn.string_column('station'), PandasColumn.exists('amount_paid'), PandasColumn.boolean_column('was_member'), ], ) @solid(output_defs=[ OutputDefinition(name='trip_dataframe', dagster_type=TripDataFrame)
EventMetadataEntry.text( max(dataframe["day"]), "max_day", "Maximum date of exchange rates", ), EventMetadataEntry.text( str(dataframe["day"].nunique()), "num_unique_day", "Total unique dates of exchange rates", ), EventMetadataEntry.text( str(dataframe["currency"].nunique()), "num_unique_currency", "Total unique currencies of exchange rates", ), EventMetadataEntry.text(str(len(dataframe)), "n_rows", "Number of rows seen in the dataframe"), ] ExchangeRateDataFrame = create_dagster_pandas_dataframe_type( name="ExchangeRateDataFrame", columns=[ PandasColumn.string_column("id"), PandasColumn.string_column("day"), PandasColumn.string_column("currency"), PandasColumn.numeric_column("rate"), ], event_metadata_fn=compute_exchange_rate_dataframe_summary_statistics, )
lambda x: x % 5 != 0)] if not rows_with_unexpected_buckets.empty: raise ColumnConstraintViolationException( constraint_name=self.name, constraint_description=self.error_description, column_name=column_name, offending_rows=rows_with_unexpected_buckets, ) CustomTripDataFrame = create_dagster_pandas_dataframe_type( name='CustomTripDataFrame', columns=[ PandasColumn('amount_paid', constraints=[ ColumnTypeConstraint('int64'), DivisibleByFiveConstraint() ]) ], ) @solid( output_defs=[ OutputDefinition(name='custom_trip_dataframe', dagster_type=CustomTripDataFrame) ], ) def load_custom_trip_dataframe(_) -> DataFrame: return read_csv( script_relative_path('./ebike_trips.csv'), parse_dates=['start_time', 'end_time'],