Exemplo n.º 1
0
def test_custom_dagster_dataframe_hydration_ok():
    input_dataframe = DataFrame({'foo': [1, 2, 3]})
    with safe_tempfile_path() as input_csv_fp, safe_tempfile_path() as output_csv_fp:
        input_dataframe.to_csv(input_csv_fp)
        TestDataFrame = create_dagster_pandas_dataframe_type(
            name='TestDataFrame', columns=[PandasColumn.exists('foo'),]
        )

        @solid(
            input_defs=[InputDefinition('test_df', TestDataFrame)],
            output_defs=[OutputDefinition(TestDataFrame)],
        )
        def use_test_dataframe(_, test_df):
            test_df['bar'] = [2, 4, 6]
            return test_df

        solid_result = execute_solid(
            use_test_dataframe,
            run_config={
                'solids': {
                    'use_test_dataframe': {
                        'inputs': {'test_df': {'csv': {'path': input_csv_fp}}},
                        'outputs': [{'result': {'csv': {'path': output_csv_fp}}},],
                    }
                }
            },
        )

        assert solid_result.success
        solid_output_df = read_csv(output_csv_fp)
        assert all(solid_output_df['bar'] == [2, 4, 6])
Exemplo n.º 2
0
def test_custom_dagster_dataframe_parametrizable_input():
    @input_selector_schema(
        Selector({'door_a': Field(str), 'door_b': Field(str), 'door_c': Field(str),})
    )
    def silly_hydrator(_, which_door, _field):
        if which_door == 'door_a':
            return DataFrame({'foo': ['goat']})
        elif which_door == 'door_b':
            return DataFrame({'foo': ['car']})
        elif which_door == 'door_c':
            return DataFrame({'foo': ['goat']})
        raise DagsterInvariantViolationError(
            'You did not pick a door. You chose: {which_door}'.format(which_door=which_door)
        )

    @output_selector_schema(Selector({'devnull': Field(str), 'nothing': Field(str)}))
    def silly_materializer(_, _location, _field, _value):
        return Materialization(label='did nothing', description='just one of those days')

    TestDataFrame = create_dagster_pandas_dataframe_type(
        name='TestDataFrame',
        columns=[PandasColumn.exists('foo'),],
        input_hydration_config=silly_hydrator,
        output_materialization_config=silly_materializer,
    )

    @solid(
        input_defs=[InputDefinition('df', TestDataFrame)],
        output_defs=[OutputDefinition(TestDataFrame)],
    )
    def did_i_win(_, df):
        return df

    solid_result = execute_solid(
        did_i_win,
        run_config={
            'solids': {
                'did_i_win': {
                    'inputs': {'df': {'door_a': 'bar'}},
                    'outputs': [{'result': {'devnull': 'baz'}}],
                }
            }
        },
    )
    assert solid_result.success
    output_df = solid_result.output_value()
    assert isinstance(output_df, DataFrame)
    assert output_df['foo'].tolist() == ['goat']
    materialization_events = solid_result.materialization_events_during_compute
    assert len(materialization_events) == 1
    assert materialization_events[0].event_specific_data.materialization.label == 'did nothing'
Exemplo n.º 3
0
def test_custom_dagster_dataframe_loading_ok():
    input_dataframe = DataFrame({"foo": [1, 2, 3]})
    with safe_tempfile_path() as input_csv_fp, safe_tempfile_path(
    ) as output_csv_fp:
        input_dataframe.to_csv(input_csv_fp)
        TestDataFrame = create_dagster_pandas_dataframe_type(
            name="TestDataFrame",
            columns=[
                PandasColumn.exists("foo"),
            ],
        )

        @op(
            ins={"test_df": In(TestDataFrame)},
            out=Out(TestDataFrame),
        )
        def use_test_dataframe(_, test_df):
            test_df["bar"] = [2, 4, 6]
            return test_df

        @graph
        def basic_graph():
            use_test_dataframe()

        result = basic_graph.execute_in_process(
            run_config={
                "ops": {
                    "use_test_dataframe": {
                        "inputs": {
                            "test_df": {
                                "csv": {
                                    "path": input_csv_fp
                                }
                            }
                        },
                        "outputs": [
                            {
                                "result": {
                                    "csv": {
                                        "path": output_csv_fp
                                    }
                                }
                            },
                        ],
                    }
                }
            })
        assert result.success
        output_df = read_csv(output_csv_fp)
        assert all(output_df["bar"] == [2, 4, 6])
Exemplo n.º 4
0
def test_custom_dagster_dataframe_loading_ok():
    input_dataframe = DataFrame({"foo": [1, 2, 3]})
    with safe_tempfile_path() as input_csv_fp, safe_tempfile_path(
    ) as output_csv_fp:
        input_dataframe.to_csv(input_csv_fp)
        TestDataFrame = create_dagster_pandas_dataframe_type(
            name="TestDataFrame",
            columns=[
                PandasColumn.exists("foo"),
            ],
        )

        @solid(
            input_defs=[InputDefinition("test_df", TestDataFrame)],
            output_defs=[OutputDefinition(TestDataFrame)],
        )
        def use_test_dataframe(_, test_df):
            test_df["bar"] = [2, 4, 6]
            return test_df

        solid_result = execute_solid(
            use_test_dataframe,
            run_config={
                "solids": {
                    "use_test_dataframe": {
                        "inputs": {
                            "test_df": {
                                "csv": {
                                    "path": input_csv_fp
                                }
                            }
                        },
                        "outputs": [
                            {
                                "result": {
                                    "csv": {
                                        "path": output_csv_fp
                                    }
                                }
                            },
                        ],
                    }
                }
            },
        )

        assert solid_result.success
        solid_output_df = read_csv(output_csv_fp)
        assert all(solid_output_df["bar"] == [2, 4, 6])
Exemplo n.º 5
0
def test_custom_dagster_dataframe_parametrizable_input():
    @dagster_type_loader(
        Selector({
            "door_a": Field(str),
            "door_b": Field(str),
            "door_c": Field(str),
        }))
    def silly_loader(_, config):
        which_door = list(config.keys())[0]
        if which_door == "door_a":
            return DataFrame({"foo": ["goat"]})
        elif which_door == "door_b":
            return DataFrame({"foo": ["car"]})
        elif which_door == "door_c":
            return DataFrame({"foo": ["goat"]})
        raise DagsterInvariantViolationError(
            "You did not pick a door. You chose: {which_door}".format(
                which_door=which_door))

    @dagster_type_materializer(
        Selector({
            "devnull": Field(str),
            "nothing": Field(str)
        }))
    def silly_materializer(_, _config, _value):
        return AssetMaterialization(asset_key="nothing",
                                    description="just one of those days")

    TestDataFrame = create_dagster_pandas_dataframe_type(
        name="TestDataFrame",
        columns=[
            PandasColumn.exists("foo"),
        ],
        loader=silly_loader,
        materializer=silly_materializer,
    )

    @solid(
        input_defs=[InputDefinition("df", TestDataFrame)],
        output_defs=[OutputDefinition(TestDataFrame)],
    )
    def did_i_win(_, df):
        return df

    solid_result = execute_solid(
        did_i_win,
        run_config={
            "solids": {
                "did_i_win": {
                    "inputs": {
                        "df": {
                            "door_a": "bar"
                        }
                    },
                    "outputs": [{
                        "result": {
                            "devnull": "baz"
                        }
                    }],
                }
            }
        },
    )
    assert solid_result.success
    output_df = solid_result.output_value()
    assert isinstance(output_df, DataFrame)
    assert output_df["foo"].tolist() == ["goat"]
    materialization_events = solid_result.materialization_events_during_compute
    assert len(materialization_events) == 1
    assert materialization_events[
        0].event_specific_data.materialization.label == "nothing"
Exemplo n.º 6
0
def test_custom_dagster_dataframe_parametrizable_input():
    @dagster_type_loader(
        Selector({
            "door_a": Field(str),
            "door_b": Field(str),
            "door_c": Field(str),
        }))
    def silly_loader(_, config):
        which_door = list(config.keys())[0]
        if which_door == "door_a":
            return DataFrame({"foo": ["goat"]})
        elif which_door == "door_b":
            return DataFrame({"foo": ["car"]})
        elif which_door == "door_c":
            return DataFrame({"foo": ["goat"]})
        raise DagsterInvariantViolationError(
            "You did not pick a door. You chose: {which_door}".format(
                which_door=which_door))

    @dagster_type_materializer(
        Selector({
            "devnull": Field(str),
            "nothing": Field(str)
        }))
    def silly_materializer(_, _config, _value):
        return AssetMaterialization(asset_key="nothing",
                                    description="just one of those days")

    TestDataFrame = create_dagster_pandas_dataframe_type(
        name="TestDataFrame",
        columns=[
            PandasColumn.exists("foo"),
        ],
        loader=silly_loader,
        materializer=silly_materializer,
    )

    @op(
        ins={"df": In(TestDataFrame)},
        out=Out(TestDataFrame),
    )
    def did_i_win(_, df):
        return df

    @graph
    def basic_graph():
        did_i_win()

    result = basic_graph.execute_in_process(
        run_config={
            "ops": {
                "did_i_win": {
                    "inputs": {
                        "df": {
                            "door_a": "bar"
                        }
                    },
                    "outputs": [{
                        "result": {
                            "devnull": "baz"
                        }
                    }],
                }
            }
        })
    assert result.success
    output_df = result.output_for_node("did_i_win")
    assert isinstance(output_df, DataFrame)
    assert output_df["foo"].tolist() == ["goat"]
    materialization_events = [
        event for event in result.all_node_events
        if event.is_step_materialization
    ]
    assert len(materialization_events) == 1
    assert materialization_events[
        0].event_specific_data.materialization.label == "nothing"
Exemplo n.º 7
0
def test_exists_column_composition():
    exists_column = PandasColumn.exists('foo')
    assert isinstance(exists_column, PandasColumn)
    assert len(exists_column.constraints) == 1
    assert isinstance(exists_column.constraints[0], ColumnExistsConstraint)