示例#1
0
def test_bin_transform_simple(data):
    transform = {"bin": True, "field": "x", "as": "xbin"}
    out = apply(data, transform)
    assert "xbin" in out.columns

    transform = {"bin": True, "field": "x", "as": ["xbin1", "xbin2"]}
    out = apply(data, transform)
    assert "xbin1" in out.columns
    assert "xbin2" in out.columns
示例#2
0
def test_bin_transform(data):
    transform = {'bin': True, 'field': 'x', 'as': 'xbin'}
    out = apply(data, transform)
    assert 'xbin' in out.columns

    transform = {'bin': True, 'field': 'x', 'as': ['xbin1', 'xbin2']}
    out = apply(data, transform)
    assert 'xbin1' in out.columns
    assert 'xbin2' in out.columns
示例#3
0
def test_bin_transform_simple(data: pd.DataFrame) -> None:
    transform = {"bin": True, "field": "x", "as": "xbin"}
    out = altair_transform.apply(data, transform)
    assert "xbin" in out.columns

    transform = {"bin": True, "field": "x", "as": ["xbin1", "xbin2"]}
    out = altair_transform.apply(data, transform)
    assert "xbin1" in out.columns
    assert "xbin2" in out.columns
示例#4
0
def test_quantile_transform_groupby(data: pd.DataFrame) -> None:
    group = "c"
    transform = {"quantile": "x", "step": 0.1, "groupby": [group]}
    out = altair_transform.apply(data, transform)
    assert list(out.columns) == ["c", "prob", "value"]

    for key in data[group].unique():
        out_group_1 = altair_transform.apply(data[data[group] == key],
                                             transform)
        out_group_2 = out[out[group] == key][out_group_1.columns].reset_index(
            drop=True)
        assert_frame_equal(out_group_1, out_group_2)
示例#5
0
def test_flatten_transform(data: pd.DataFrame) -> None:
    out = altair_transform.apply(data, {"flatten": ["x"]})
    assert out.shape == (9, 3)
    assert out.columns.tolist() == ["x", "y", "cat"]
    assert_equal(out.x.values, range(1, 10))
    assert_equal(out.cat.values, list("AAABBBBCC"))

    out = altair_transform.apply(data, {"flatten": ["x", "y"]})
    assert out.shape == (9, 3)
    assert out.columns.tolist() == ["x", "y", "cat"]
    assert_equal(out.x.values, range(1, 10))
    assert_equal(out.y.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6])
    assert_equal(out.cat.values, list("AAABBBBCC"))
示例#6
0
def test_fold_transform(data, as_: Optional[List[str]]):
    if as_ is None:
        out = altair_transform.apply(data, {"fold": ["y1", "y2"]})
        as_ = ["key", "value"]
    else:
        out = altair_transform.apply(data, {"fold": ["y1", "y2"], "as": as_})

    expected = pd.DataFrame({
        "x": np.repeat(data["x"], 2),
        as_[0]: 3 * ["y1", "y2"],
        as_[1]: np.ravel((data["y1"], data["y2"]), "F"),
        "y1": np.repeat(data["y1"], 2),
        "y2": np.repeat(data["y2"], 2),
    }).reset_index(drop=True)
    assert_frame_equal(out, expected)
示例#7
0
def test_flatten_transform_with_as(data: pd.DataFrame):
    out = altair_transform.apply(data, {"flatten": ["y"], "as": ["yflat"]})
    assert out.shape == (6, 4)
    assert out.columns.tolist() == ["yflat", "x", "y", "cat"]
    assert_equal(out.yflat.values, range(1, 7))
    assert_equal(out.cat.values, list("AABBCC"))

    out = altair_transform.apply(
        data, {"flatten": ["x", "y"], "as": ["xflat", "yflat"]}
    )
    assert out.shape == (9, 5)
    assert out.columns.tolist() == ["xflat", "yflat", "x", "y", "cat"]
    assert_equal(out.xflat.values, range(1, 10))
    assert_equal(out.yflat.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6])
    assert_equal(out.cat.values, list("AAABBBBCC"))
示例#8
0
def test_pivot_transform_limit(data: pd.DataFrame) -> None:
    transform = {"pivot": "c", "value": "x", "limit": 2}
    expected = pd.DataFrame(
        {key: [data.x[data.c == key].sum()] for key in sorted(data.c.unique())[:2]}
    )
    out = altair_transform.apply(data, transform)
    assert_frame_equal(out, expected)
示例#9
0
def test_window_against_js(
    driver,
    data: pd.DataFrame,
    groupby: Optional[List[str]],
    sort: Optional[str],
    frame: Optional[List[Optional[int]]],
) -> None:
    transform: Dict[str, Any] = {
        "window": [{
            "op": "sum",
            "field": "x",
            "as": "xsum"
        }],
        "ignorePeers": False,
    }
    if groupby is not None:
        transform["groupby"] = groupby
    if sort is not None:
        transform["sort"] = [{"field": sort}]
    if frame is not None:
        transform["frame"] = frame
    got = altair_transform.apply(data, transform)
    want = driver.apply(data, transform)
    assert_frame_equal(
        got[sorted(got.columns)],
        want[sorted(want.columns)],
        check_dtype=False,
        check_index_type=False,
        check_less_precise=True,
    )
示例#10
0
def test_regression_against_js(
    driver, data: pd.DataFrame, method: str, params: str, groupby: Optional[List[str]],
) -> None:
    transform: Dict[str, Any] = {
        "regression": "y",
        "on": "x",
        "method": method,
        "params": params,
    }
    if groupby:
        transform["groupby"] = groupby
    got = altair_transform.apply(data, transform)
    want = driver.apply(data, transform)

    # Account for differences in handling of undefined between browsers.
    if params and not groupby and got.shape != want.shape:
        got["keys"] = [None]

    assert_frame_equal(
        got[sorted(got.columns)],
        want[sorted(want.columns)],
        check_dtype=False,
        check_index_type=False,
        check_less_precise=True,
    )
示例#11
0
def test_quantile_against_js(
    driver,
    data: pd.DataFrame,
    step: Optional[float],
    groupby: Optional[List[str]],
    probs: Optional[List[float]],
    as_: Optional[List[str]],
) -> None:
    transform: Dict[str, Any] = {"quantile": "x"}
    if step is not None:
        transform["step"] = step
    if groupby is not None:
        transform["groupby"] = groupby
    if probs is not None:
        transform["probs"] = probs
    if as_ is not None:
        transform["as"] = as_
    got = altair_transform.apply(data, transform)
    want = driver.apply(data, transform)
    assert_frame_equal(
        got[sorted(got.columns)],
        want[sorted(want.columns)],
        check_dtype=False,
        check_index_type=False,
        check_less_precise=True,
    )
示例#12
0
def test_poly_vs_linear(groupby: List[str], method: str, order: int) -> None:
    data = pd.DataFrame(
        {
            "x": [0, 1, 2, 3, 4, 1, 2, 3],
            "y": [2, 4, 6, 8, 10, 2, 3, 4],
            "g": [0, 0, 0, 0, 0, 1, 1, 1],
        }
    )
    kwds = {} if not groupby else {"groupby": groupby}
    out1 = altair_transform.apply(
        data, {"regression": "y", "on": "x", "method": method, **kwds}
    )
    out2 = altair_transform.apply(
        data, {"regression": "y", "on": "x", "method": "poly", "order": order, **kwds}
    )
    assert_frame_equal(out1, out2, check_dtype=False)
示例#13
0
def test_linear() -> None:
    data = pd.DataFrame({"x": [0, 1, 2, 3, 4], "y": [2, 4, 6, 8, 10]})
    transform = {"regression": "y", "on": "x"}
    out = altair_transform.apply(data, transform)
    assert_frame_equal(
        out, pd.DataFrame({"x": [0.0, 4.0], "y": [2.0, 10.0]}), check_dtype=False
    )
示例#14
0
def test_calculate_transform(data):
    transform = {"calculate": "datum.x + datum.y", "as": "z"}
    out1 = apply(data, transform)

    out2 = data.copy()
    out2["z"] = data.x + data.y

    assert out1.equals(out2)
示例#15
0
def test_calculate_transform(data):
    transform = {"calculate": "datum.x + datum.y", "as": "z"}
    out1 = altair_transform.apply(data, transform)

    out2 = data.copy()
    out2["z"] = data.x + data.y

    assert_frame_equal(out1, out2)
示例#16
0
def test_filter_transform(
    data: pd.DataFrame,
    filter: Union[str, Dict[str, Any]],
    calc: Callable[[pd.DataFrame], pd.DataFrame],
):
    out1 = altair_transform.apply(data, {"filter": filter})
    out2 = calc(data).reset_index(drop=True)
    assert_frame_equal(out1, out2)
示例#17
0
def test_calculate_transform(data):
    transform = {'calculate': 'datum.x + datum.y', 'as': 'z'}
    out1 = apply(data, transform)

    out2 = data.copy()
    out2['z'] = data.x + data.y

    assert out1.equals(out2)
示例#18
0
def test_fold_transform(as_):
    data = pd.DataFrame({
        "x": [1, 2, 3],
        "y1": ["A", "B", "C"],
        "y2": ["D", "E", "F"]
    })
    if as_ is None:
        out = apply(data, {"fold": ["y1", "y2"]})
        as_ = ["key", "value"]
    else:
        out = apply(data, {"fold": ["y1", "y2"], "as": as_})

    expected = pd.DataFrame({
        "x": 2 * data["x"].tolist(),
        as_[0]: 3 * ["y1"] + 3 * ["y2"],
        as_[1]: data["y1"].tolist() + data["y2"].tolist(),
    })
    assert out.equals(expected)
示例#19
0
def test_sample_transform(data, N):
    transform = {'sample': N}
    out = apply(data, transform)

    # Ensure the shape is correct
    assert out.shape == (min(N, data.shape[0]), data.shape[1])

    # Ensure the content are correct
    assert out.equals(data.iloc[out.index])
示例#20
0
def test_sample_transform(data, N):
    transform = {"sample": N}
    out = altair_transform.apply(data, transform)

    # Ensure the shape is correct
    assert out.shape == (min(N, data.shape[0]), data.shape[1])

    # Ensure the content are correct
    assert_frame_equal(out, data.iloc[out.index])
示例#21
0
def test_flatten_transform_with_as():
    data = pd.DataFrame({
        "x": [[1, 2, 3], [4, 5, 6, 7], [8, 9]],
        "y": [[1, 2], [3, 4], [5, 6]],
        "cat": list("ABC"),
    })

    out = apply(data, {"flatten": ["y"], "as": ["yflat"]})
    assert out.shape == (6, 3)
    assert out.columns.tolist() == ["yflat", "x", "cat"]
    assert_equal(out.yflat.values, range(1, 7))
    assert_equal(out.cat.values, list("AABBCC"))

    out = apply(data, {"flatten": ["x", "y"], "as": ["xflat", "yflat"]})
    assert out.shape == (9, 3)
    assert out.columns.tolist() == ["xflat", "yflat", "cat"]
    assert_equal(out.xflat.values, range(1, 10))
    assert_equal(out.yflat.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6])
    assert_equal(out.cat.values, list("AAABBBBCC"))
示例#22
0
def test_flatten_transform_with_as():
    data = pd.DataFrame({
        'x': [[1, 2, 3], [4, 5, 6, 7], [8, 9]],
        'y': [[1, 2], [3, 4], [5, 6]],
        'cat': list('ABC')
    })

    out = apply(data, {'flatten': ['y'], 'as': ['yflat']})
    assert out.shape == (6, 3)
    assert out.columns.tolist() == ['yflat', 'x', 'cat']
    assert_equal(out.yflat.values, range(1, 7))
    assert_equal(out.cat.values, list('AABBCC'))

    out = apply(data, {'flatten': ['x', 'y'], 'as': ['xflat', 'yflat']})
    assert out.shape == (9, 3)
    assert out.columns.tolist() == ['xflat', 'yflat', 'cat']
    assert_equal(out.xflat.values, range(1, 10))
    assert_equal(out.yflat.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6])
    assert_equal(out.cat.values, list('AAABBBBCC'))
示例#23
0
def test_window_transform_basic(data):
    transform = {
        'window': [{
            'op': 'sum',
            'field': 'x',
            'as': 'xsum'
        }],
    }
    out = apply(data, transform)
    expected = data['x'].cumsum()
    assert out['xsum'].equals(expected.astype(float))
示例#24
0
def test_bin_against_js(driver, data: pd.DataFrame,
                        transform: Dict[str, Any]) -> None:
    got = altair_transform.apply(data, transform)
    want = driver.apply(data, transform)
    assert_frame_equal(
        got[sorted(got.columns)],
        want[sorted(want.columns)],
        check_dtype=False,
        check_index_type=False,
        check_less_precise=True,
    )
示例#25
0
def test_lookup_transform(data: pd.DataFrame, lookup_key: str) -> None:
    lookup = pd.DataFrame({lookup_key: list("ABCD"), "z": [3, 1, 4, 5]})
    transform = {
        "lookup": "c",
        "from": {"data": to_values(lookup), "key": lookup_key, "fields": ["z"]},
    }
    out1 = altair_transform.apply(data, transform)
    out2 = pd.merge(data, lookup, left_on="c", right_on=lookup_key)
    if lookup_key != "c":
        out2 = out2.drop(lookup_key, axis=1)
    assert_frame_equal(out1, out2)
示例#26
0
def test_multiple_transforms(data):
    transform = [{
        'calculate': '0.5 * (datum.x + datum.y)',
        'as': 'xy_mean'
    }, {
        'filter': 'datum.x < datum.xy_mean'
    }]
    out1 = apply(data, transform)
    out2 = data.copy()
    out2['xy_mean'] = 0.5 * (data.x + data.y)
    out2 = out2[out2.x < out2.xy_mean]

    assert out1.equals(out2)
示例#27
0
def test_window_transform_grouped(data):
    transform = {
        'window': [{
            'op': 'sum',
            'field': 'x',
            'as': 'xsum'
        }],
        'groupby': ['y'],
    }
    out = apply(data, transform)
    expected = data.groupby('y').rolling(len(data), min_periods=1)
    expected = expected['x'].sum().reset_index('y', drop=True).sort_index()
    assert out['xsum'].equals(expected)
示例#28
0
def test_window_transform_grouped(data):
    transform = {
        "window": [{
            "op": "sum",
            "field": "x",
            "as": "xsum"
        }],
        "groupby": ["y"],
    }
    out = apply(data, transform)
    expected = data.groupby("y").rolling(len(data), min_periods=1)
    expected = expected["x"].sum().reset_index("y", drop=True).sort_index()
    assert out["xsum"].equals(expected)
示例#29
0
def test_window_transform_sorted(data):
    transform = {
        'window': [{
            'op': 'sum',
            'field': 'x',
            'as': 'xsum'
        }],
        'sort': [{
            'field': 'x'
        }]
    }
    out = apply(data, transform)
    expected = data['x'].sort_values().cumsum().sort_index()
    assert out['xsum'].equals(expected.astype(float))
示例#30
0
def test_bin_transform_step(nice: bool, step: int) -> None:
    data = pd.DataFrame({"x": np.arange(100)})
    transform = {
        "bin": {
            "step": step,
            "nice": nice
        },
        "field": "x",
        "as": "xbin"
    }
    out = altair_transform.apply(data, transform)
    bins = np.sort(out.xbin.unique())
    assert np.allclose(bins[1:] - bins[:-1], step)
    assert not out.xbin.isnull().any()