示例#1
0
def test_arideda_numfeature():
    """
    Ensure data frame is appropriate size according to features
    """
    features = ["sepalLength", "sepalWidth"]
    out, _ = aa.arid_eda(data.iris(), "species", "categorical", features)
    assert out.shape == (8, len(features))
示例#2
0
def test_multiscatterplot_args_alternate():
    """Test multiscatterplot."""
    d = data.iris()
    return ar.multiscatterplot(
        columns=[d["sepalLength"], d["sepalWidth"], d["petalLength"]],
        color=d["species"],
    )
示例#3
0
def test_arideda_return():
    """
    Test return data type
    """
    _, out = aa.arid_eda(
        data.iris(), "species", "categorical", ["sepalLength", "sepalWidth"]
    )
    assert isinstance(out, alt.HConcatChart)
示例#4
0
def test_arideda_features():
    """
    Test calling with valid features list
    """
    out, _ = aa.arid_eda(
        data.iris(), "species", "categorical", ["sepalLength", "sepalWidth"]
    )
    assert isinstance(out, pd.core.frame.DataFrame)
def test_iris_column_names():
    iris = data.iris()
    assert type(iris) is pd.DataFrame
    assert tuple(iris.columns) == ('petalLength', 'petalWidth', 'sepalLength',
                                   'sepalWidth', 'species')

    iris = data.iris.raw()
    assert type(iris) is bytes
示例#6
0
def test_download_iris():
    iris = data.iris(use_local=False)
    assert type(iris) is pd.DataFrame
    assert tuple(iris.columns) == ('petalLength', 'petalWidth', 'sepalLength',
                                   'sepalWidth', 'species')

    iris = data.iris.raw(use_local=False)
    assert type(iris) is bytes
示例#7
0
def test_scatterplot():
    return ar.scatterplot(
        data.iris(),
        x="petalWidth",
        y="petalLength",
        color="sepalWidth",
        tooltip="species",
    )
示例#8
0
def test_scatterplot_alternate_data():
    d = data.iris()
    return ar.scatterplot(
        x=d["petalWidth"],
        y=d["petalLength"],
        color=d["sepalWidth"],
        tooltip=d["species"],
    )
示例#9
0
def make_static_chart():
    '''
    '''
    return alt.Chart(data=data.iris()).mark_circle(size=60).encode(
        x='petalLength:Q',
        y='petalWidth:Q',
        color='species:N',
        tooltip='sepalWidth:Q',
    ).interactive()
示例#10
0
def test_arideda_empty_df():
    """
    Test if error occurs when repsonse type is not categorical or continuous
    """
    with pytest.raises(AssertionError):
        aa.arid_eda(
            data.iris(),
            "species",
            "ORDINAL",
            ["sepalLength", "sepalWidth"])
示例#11
0
def test_response_type_incorrect():
    """
    Test if an error occurs when wrong response type is given
    """
    with pytest.raises(AssertionError):
        aa.arid_eda(
            data.iris(),
            "petalLength",
            "categorical",
            ["sepalLength", "sepalWidth"]
        )
示例#12
0
def example_scatterplot():
    import altair as alt
    from vega_datasets import data

    df = data.iris()

    p = alt.Chart(df).mark_circle().encode(x='petalLength:Q',
                                           y='sepalLength:Q',
                                           color='species:N').properties(
                                               width=200, height=200)

    return p
def test_iris_column_names():
    iris = data.iris()
    assert type(iris) is pd.DataFrame
    assert sorted(iris.columns) == [
        "petalLength",
        "petalWidth",
        "sepalLength",
        "sepalWidth",
        "species",
    ]

    iris = data.iris.raw()
    assert type(iris) is bytes
示例#14
0
def test_download_iris():
    iris = data.iris(use_local=False)
    assert type(iris) is pd.DataFrame
    assert sorted(iris.columns) == [
        "petalLength",
        "petalWidth",
        "sepalLength",
        "sepalWidth",
        "species",
    ]

    iris = data.iris.raw(use_local=False)
    assert type(iris) is bytes
示例#15
0
def example_scatterplot_matrix():
    import altair as alt
    from vega_datasets import data

    df = data.iris()

    p = alt.Chart(df).mark_circle().encode(
        x=alt.X(alt.repeat('column'), type='quantitative'),
        y=alt.Y(alt.repeat('row'), type='quantitative'),
        color='species:N').properties(width=150, height=150).repeat(
            row=['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'],
            column=['sepalLength', 'sepalWidth', 'petalLength',
                    'petalWidth']).interactive()

    return p
示例#16
0
def test_arideda_returns_tuple():
    """
    Check that function returns two items
    """
    assert (
        len(
            aa.arid_eda(
                data.iris(),
                "species",
                "categorical",
                ["sepalLength", "sepalWidth"]
            )
        )
        == 2
    )
示例#17
0
def test_boxplot_cast():
    iris = data.iris()
    return ar.boxplot(iris, columns=list(iris.columns[:-1]))
示例#18
0
def load_data():
    dataframe = data.iris()
    return dataframe
示例#19
0
def test_multiscatterplot_args():
    """Test multiscatterplot."""
    return ar.multiscatterplot(
        data.iris(), columns=data.iris().columns[:-1], color="species"
    )
示例#20
0
def test_multiscatterplot_defaults():
    return ar.multiscatterplot(data.iris())
示例#21
0
from __future__ import annotations
import typing as t
from vega_datasets import data

if t.TYPE_CHECKING:
    from pandas.core.frame import DataFrame

df: DataFrame = data.cars()
print(df.head())
print(df.describe())
print(df.columns)
grouped_df = df.groupby(by=["Year"]).agg(
    {"Horsepower": ["max", "min", "mean", "std", "count"]})
print(grouped_df)

print("")
print("----------------------------------------")
print("")
df: DataFrame = data.iris()
print(df.head())
print(df.describe())
print(df.columns)
grouped_df = df.groupby(by=["species"]).agg({
    "sepalLength": ["max", "min", "mean", "std", "count"],
    "sepalWidth": ["max", "min", "mean", "std", "count"],
})
print(grouped_df)
In [70]:
# scatter plot

# data
x_scatter = cars.Weight_in_lbs
y_scatter = cars.Miles_per_Gallon

# plot 
scatter_plot = figure(plot_width=500, plot_height=300, x_axis_label='Weight_in_lbs', y_axis_label='Miles_per_Gallon')
scatter_plot.circle(x_scatter, y_scatter, size=15, line_color='navy', fill_color='orange', fill_alpha=0.5)
show(scatter_plot)
Other scatter plot variations include: cross, x, diamond, diamond_cross, circle_x, circle_cross, triangle, inverted_triangle, square, square_x, square_cross, asterisk

In [71]:
# vega data sets iris data
iris = vds.iris()
iris.tail()
Out[71]:
petalLength	petalWidth	sepalLength	sepalWidth	species
145	5.2	2.3	6.7	3.0	virginica
146	5.0	1.9	6.3	2.5	virginica
147	5.2	2.0	6.5	3.0	virginica
148	5.4	2.3	6.2	3.4	virginica
149	5.1	1.8	5.9	3.0	virginica
In [72]:
# scatter plot subgroups using iris data

from bokeh.transform import factor_cmap, factor_mark

# data
# use vega_datasets iris data
示例#23
0
def test_boxplot_melted():
    return ar.boxplot(data.iris(), columns="petalLength", group_by="species")
示例#24
0
"""
Parallel Coordinates Example
----------------------------
A `Parallel Coordinates <https://en.wikipedia.org/wiki/Parallel_coordinates>`_
chart is a chart that lets you visualize the individual data points by drawing
a single line for each of them.
Such a chart can be created in Altair, but requires some data preprocessing
to transform the data into a suitable representation.
This example shows a parallel coordinates chart with the Iris dataset.
"""
# category: other charts

import altair as alt
from vega_datasets import data

source = data.iris()
source_transformed = source.reset_index().melt(['species', 'index'])

alt.Chart(source_transformed).mark_line().encode(
    x='variable:N',
    y='value:Q',
    color='species:N',
    detail='index:N',
    opacity=alt.value(0.5)
).properties(width=500)
示例#25
0
from vega_datasets import data

df = data.iris()
print(df.describe())
示例#26
0
import altair as alt
from vega_datasets import data

alt.renderers.enable('notebook')

iris = data.iris()

chart = alt.Chart(iris).mark_point().encode(x='petalLength',
                                            y='petalWidth',
                                            color='species')

# 크롬 브라우저에서 표시
chart.show()

# pip install altair vega_datasets vega

# pip install altair_viewer
# alt.renderers.enable('altair_viewer')
# 렌더링

# https://altair-viz.github.io/user_guide/display_frontends.html#display-general
示例#27
0
import altair as alt
from vega_datasets import data

source = data.iris()

base = alt.Chart(source)

xscale = alt.Scale(domain=(4.0, 8.0))
yscale = alt.Scale(domain=(1.9, 4.55))

area_args = {'opacity': .3, 'interpolate': 'step'}

points = base.mark_circle().encode(
    alt.X('sepalLength', scale=xscale),
    alt.Y('sepalWidth', scale=yscale),
    color='species',
)

top_hist = base.mark_area(**area_args).encode(
    alt.X(
        'sepalLength:Q',
        # when using bins, the axis scale is set through
        # the bin extent, so we do not specify the scale here
        # (which would be ignored anyway)
        bin=alt.Bin(maxbins=20, extent=xscale.domain),
        stack=None,
        title=''),
    alt.Y('count()', stack=None, title=''),
    alt.Color('species:N'),
).properties(height=60)
示例#28
0
def test_multiscatter_defaults():
    return ar.multiscatter(data.iris())