Exemplo n.º 1
0
def test_get_domain_records_with_column_domain():
    engine = PandasExecutionEngine()
    df = pd.DataFrame({
        "a": [1, 2, 3, 4, 5],
        "b": [2, 3, 4, 5, None],
        "c": [1, 2, 3, 4, None]
    })
    # Loading batch data
    engine.load_batch_data(batch_data=df, batch_id="1234")

    data = engine.get_domain_records(domain_kwargs={
        "column": "a",
        "row_condition": "b<5",
        "condition_parser": "pandas",
    })

    expected_column_df = df.iloc[:3]

    assert data.equals(
        expected_column_df
    ), "Data does not match after getting full access compute domain"
Exemplo n.º 2
0
def test_get_domain_records_with_multicolumn_domain():
    engine = PandasExecutionEngine()
    df = pd.DataFrame({
        "a": [1, 2, 3, 4, None, 5],
        "b": [2, 3, 4, 5, 6, 7],
        "c": [1, 2, 3, 4, None, 6],
    })
    # Loading batch data
    engine.load_batch_data(batch_data=df, batch_id="1234")

    data = engine.get_domain_records(
        domain_kwargs={
            "column_list": ["a", "c"],
            "row_condition": "b>2",
            "condition_parser": "pandas",
            "ignore_row_if": "all_values_are_missing",
        })
    data = data.astype(int)

    expected_multicolumn_df = pd.DataFrame(
        {
            "a": [2, 3, 4, 5],
            "b": [3, 4, 5, 7],
            "c": [2, 3, 4, 6]
        },
        index=[1, 2, 3, 5])

    assert data.equals(
        expected_multicolumn_df
    ), "Data does not match after getting full access compute domain"

    data = engine.get_domain_records(
        domain_kwargs={
            "column_list": ["b", "c"],
            "row_condition": "a<5",
            "condition_parser": "pandas",
            "ignore_row_if": "any_value_is_missing",
        })
    data = data.astype(int)

    expected_multicolumn_df = pd.DataFrame(
        {
            "a": [1, 2, 3, 4],
            "b": [2, 3, 4, 5],
            "c": [1, 2, 3, 4]
        },
        index=[0, 1, 2, 3])

    assert data.equals(
        expected_multicolumn_df
    ), "Data does not match after getting full access compute domain"

    engine = PandasExecutionEngine()
    df = pd.DataFrame({
        "a": [1, 2, 3, 4, None, 5],
        "b": [2, 3, 4, 5, 6, 7],
        "c": [1, 2, 3, 4, None, 6],
    })
    # Loading batch data
    engine.load_batch_data(batch_data=df, batch_id="1234")

    data = engine.get_domain_records(domain_kwargs={
        "column_list": ["b", "c"],
        "ignore_row_if": "never",
    })

    expected_multicolumn_df = pd.DataFrame(
        {
            "a": [1, 2, 3, 4, None, 5],
            "b": [2, 3, 4, 5, 6, 7],
            "c": [1, 2, 3, 4, None, 6],
        },
        index=[0, 1, 2, 3, 4, 5],
    )

    assert data.equals(
        expected_multicolumn_df
    ), "Data does not match after getting full access compute domain"
Exemplo n.º 3
0
def test_get_domain_records_with_column_pair_domain():
    engine = PandasExecutionEngine()
    df = pd.DataFrame({
        "a": [1, 2, 3, 4, 5, 6],
        "b": [2, 3, 4, 5, None, 6],
        "c": [1, 2, 3, 4, 5, None],
    })
    # Loading batch data
    engine.load_batch_data(batch_data=df, batch_id="1234")

    data = engine.get_domain_records(
        domain_kwargs={
            "column_A": "a",
            "column_B": "b",
            "row_condition": "b>2",
            "condition_parser": "pandas",
            "ignore_row_if": "both_values_are_missing",
        })

    expected_column_pair_df = pd.DataFrame(
        {
            "a": [2, 3, 4, 6],
            "b": [3.0, 4.0, 5.0, 6.0],
            "c": [2.0, 3.0, 4.0, None],
        },
        index=[1, 2, 3, 5],
    )
    assert data.equals(
        expected_column_pair_df
    ), "Data does not match after getting full access compute domain"

    data = engine.get_domain_records(
        domain_kwargs={
            "column_A": "b",
            "column_B": "c",
            "row_condition": "b>2",
            "condition_parser": "pandas",
            "ignore_row_if": "either_value_is_missing",
        })
    data = data.astype(int)

    expected_column_pair_df = pd.DataFrame(
        {
            "a": [2, 3, 4],
            "b": [3, 4, 5],
            "c": [2, 3, 4]
        }, index=[1, 2, 3])

    assert data.equals(
        expected_column_pair_df
    ), "Data does not match after getting full access compute domain"

    data = engine.get_domain_records(
        domain_kwargs={
            "column_A": "b",
            "column_B": "c",
            "row_condition": "a<6",
            "condition_parser": "pandas",
            "ignore_row_if": "neither",
        })

    expected_column_pair_df = pd.DataFrame({
        "a": [1, 2, 3, 4, 5],
        "b": [2.0, 3.0, 4.0, 5.0, None],
        "c": [1.0, 2.0, 3.0, 4.0, 5.0],
    })

    assert data.equals(
        expected_column_pair_df
    ), "Data does not match after getting full access compute domain"