def test_failing_type_eval_column(): ntype = create_structured_dataframe_type( "NumericType", columns_validator=column_validator, columns_aggregate_validator=aggregate_validator, dataframe_validator=dataframe_validator, ) @op(out={"basic_dataframe": Out(dagster_type=ntype)}) def create_dataframe(_): yield Output( DataFrame({ "foo": [1, "a", 7], "bar": [9, 10, 11] }), output_name="basic_dataframe", ) @graph def basic_graph(): return create_dataframe() result = basic_graph.execute_in_process(raise_on_error=False) output = [ item for item in result.all_node_events if item.is_successful_output ][0] output_data = output.event_specific_data.type_check_data output_metadata = output_data.metadata_entries assert len(output_metadata) == 1 column_const = output_metadata[0] assert column_const.label == "columns-constraint-metadata" column_const_data = column_const.entry_data.data assert column_const_data["expected"] == { "foo": { "in_range_validation_fn": in_range_validator.__doc__.strip(), "dtype_in_set_validation_fn": dtype_is_num_validator.__doc__.strip(), } } assert column_const_data["offending"] == { "foo": { "dtype_in_set_validation_fn": ["row 1"], "in_range_validation_fn": ["row 1", "row 2"], } } assert column_const_data["actual"] == { "foo": { "dtype_in_set_validation_fn": ["a"], "in_range_validation_fn": ["a", 7] } }
def test_failing_type_eval_aggregate(): ntype = create_structured_dataframe_type( "NumericType", columns_validator=column_validator, columns_aggregate_validator=aggregate_validator, dataframe_validator=dataframe_validator, ) @solid(output_defs=[ OutputDefinition(name='basic_dataframe', dagster_type=ntype) ]) def create_dataframe(_): yield Output( DataFrame({ 'foo': [1, 2, 3], 'bar': [9, 10, 10] }), output_name='basic_dataframe', ) @pipeline def basic_pipeline(): return create_dataframe() result = execute_pipeline(basic_pipeline, raise_on_error=False) output = [ item for item in result.step_event_list if item.is_successful_output ][0] output_data = output.event_specific_data.type_check_data output_metadata = output_data.metadata_entries assert len(output_metadata) == 1 column_const = output_metadata[0] assert column_const.label == 'column-aggregates-constraint-metadata' column_const_data = column_const.entry_data.data assert column_const_data['expected'] == { 'bar': { 'all_unique_validator': all_unique_validator.__doc__.strip() } } assert column_const_data['offending'] == { 'bar': { 'all_unique_validator': 'a violation' } } assert column_const_data['actual'] == { 'bar': { 'all_unique_validator': [10.0] } }
def test_failing_type_eval_dataframe(): ntype = create_structured_dataframe_type( "NumericType", columns_validator=column_validator, columns_aggregate_validator=aggregate_validator, dataframe_validator=dataframe_validator, ) @solid(output_defs=[ OutputDefinition(name="basic_dataframe", dagster_type=ntype) ]) def create_dataframe(_): yield Output( DataFrame({ "foo": [1, 2, 3], "baz": [9, 10, 10] }), output_name="basic_dataframe", ) @pipeline def basic_pipeline(): return create_dataframe() result = execute_pipeline(basic_pipeline, raise_on_error=False) output = [ item for item in result.step_event_list if item.is_successful_output ][0] output_data = output.event_specific_data.type_check_data output_metadata = output_data.metadata_entries assert len(output_metadata) == 1 column_const = output_metadata[0] assert column_const.label == "dataframe-constraint-metadata" column_const_data = column_const.entry_data.data assert column_const_data["expected"] == ["foo", "bar"] assert column_const_data["actual"] == { "extra_columns": ["baz"], "missing_columns": ["bar"] }
def test_failing_type_eval_dataframe(): ntype = create_structured_dataframe_type( "NumericType", columns_validator=column_validator, columns_aggregate_validator=aggregate_validator, dataframe_validator=dataframe_validator, ) @op(out={"basic_dataframe": Out(dagster_type=ntype)}) def create_dataframe(_): yield Output( DataFrame({ "foo": [1, 2, 3], "baz": [9, 10, 10] }), output_name="basic_dataframe", ) @graph def basic_graph(): return create_dataframe() result = basic_graph.execute_in_process(raise_on_error=False) output = [ item for item in result.all_node_events if item.is_successful_output ][0] output_data = output.event_specific_data.type_check_data output_metadata = output_data.metadata_entries assert len(output_metadata) == 1 column_const = output_metadata[0] assert column_const.label == "dataframe-constraint-metadata" column_const_data = column_const.entry_data.data assert column_const_data["expected"] == ["foo", "bar"] assert column_const_data["actual"] == { "extra_columns": ["baz"], "missing_columns": ["bar"] }
def test_successful_type_eval(): ntype = create_structured_dataframe_type( "NumericType", columns_validator=column_validator, columns_aggregate_validator=aggregate_validator, dataframe_validator=dataframe_validator, ) @op(out={"basic_dataframe": Out(dagster_type=ntype)}) def create_dataframe(_): yield Output( DataFrame({ "foo": [1, 2, 3], "bar": [9, 10, 11] }), output_name="basic_dataframe", ) @graph def basic_graph(): return create_dataframe() result = basic_graph.execute_in_process() assert result.success
def test_failing_type_eval_multi_error(): ntype = create_structured_dataframe_type( "NumericType", columns_validator=column_validator, columns_aggregate_validator=aggregate_validator, dataframe_validator=dataframe_validator, ) @solid(output_defs=[ OutputDefinition(name='basic_dataframe', dagster_type=ntype) ]) def create_dataframe(_): yield Output( DataFrame({ 'foo': [1, 'a', 7], 'baz': [9, 10, 10], 'bar': [9, 10, 10] }), output_name='basic_dataframe', ) @pipeline def basic_pipeline(): return create_dataframe() result = execute_pipeline(basic_pipeline, raise_on_error=False) output = [ item for item in result.step_event_list if item.is_successful_output ][0] output_data = output.event_specific_data.type_check_data output_metadata = output_data.metadata_entries assert len(output_metadata) == 3 agg_data = output_metadata[0] assert agg_data.label == 'column-aggregates-constraint-metadata' agg_metadata = agg_data.entry_data.data assert agg_metadata['expected'] == { 'bar': { 'all_unique_validator': all_unique_validator.__doc__.strip() } } assert agg_metadata['offending'] == { 'bar': { 'all_unique_validator': 'a violation' } } assert agg_metadata['actual'] == {'bar': {'all_unique_validator': [10.0]}} column_const = output_metadata[1] assert column_const.label == 'columns-constraint-metadata' column_const_data = column_const.entry_data.data assert column_const_data['expected'] == { 'foo': { 'in_range_validation_fn': in_range_validator.__doc__.strip(), 'dtype_in_set_validation_fn': dtype_is_num_validator.__doc__.strip(), } } assert column_const_data['offending'] == { 'foo': { 'dtype_in_set_validation_fn': ['row 1'], 'in_range_validation_fn': ['row 1', 'row 2'], } } assert column_const_data['actual'] == { 'foo': { 'dtype_in_set_validation_fn': ['a'], 'in_range_validation_fn': ['a', 7] } } df_data = output_metadata[2] assert df_data.label == 'dataframe-constraint-metadata' df_metadata = df_data.entry_data.data assert df_metadata['expected'] == ['foo', 'bar'] assert df_metadata['actual'] == { 'extra_columns': ['baz'], 'missing_columns': [] }
def test_failing_type_eval_multi_error(): ntype = create_structured_dataframe_type( "NumericType", columns_validator=column_validator, columns_aggregate_validator=aggregate_validator, dataframe_validator=dataframe_validator, ) @solid(output_defs=[ OutputDefinition(name="basic_dataframe", dagster_type=ntype) ]) def create_dataframe(_): yield Output( DataFrame({ "foo": [1, "a", 7], "baz": [9, 10, 10], "bar": [9, 10, 10] }), output_name="basic_dataframe", ) @pipeline def basic_pipeline(): return create_dataframe() result = execute_pipeline(basic_pipeline, raise_on_error=False) output = [ item for item in result.step_event_list if item.is_successful_output ][0] output_data = output.event_specific_data.type_check_data output_metadata = output_data.metadata_entries assert len(output_metadata) == 3 agg_data = output_metadata[0] assert agg_data.label == "column-aggregates-constraint-metadata" agg_metadata = agg_data.entry_data.data assert agg_metadata["expected"] == { "bar": { "all_unique_validator": all_unique_validator.__doc__.strip() } } assert agg_metadata["offending"] == { "bar": { "all_unique_validator": "a violation" } } assert agg_metadata["actual"] == {"bar": {"all_unique_validator": [10.0]}} column_const = output_metadata[1] assert column_const.label == "columns-constraint-metadata" column_const_data = column_const.entry_data.data assert column_const_data["expected"] == { "foo": { "in_range_validation_fn": in_range_validator.__doc__.strip(), "dtype_in_set_validation_fn": dtype_is_num_validator.__doc__.strip(), } } assert column_const_data["offending"] == { "foo": { "dtype_in_set_validation_fn": ["row 1"], "in_range_validation_fn": ["row 1", "row 2"], } } assert column_const_data["actual"] == { "foo": { "dtype_in_set_validation_fn": ["a"], "in_range_validation_fn": ["a", 7] } } df_data = output_metadata[2] assert df_data.label == "dataframe-constraint-metadata" df_metadata = df_data.entry_data.data assert df_metadata["expected"] == ["foo", "bar"] assert df_metadata["actual"] == { "extra_columns": ["baz"], "missing_columns": [] }