示例#1
0
def andrews_curve(
    data: pd.DataFrame,
    xvar: str = "t",
    yvar: str = "curve_value",
    targetvar: str = "target",
    samplevar: str = "sample",
    w: int = 450,
    h: int = 300,
) -> alt.LayerChart:
    selection = alt.selection_single(fields=[targetvar], bind="legend")

    base = alt.Chart(data).properties(width=w, height=h).mark_line()

    background_chart = base.encode(
        x=alt.X(f"{xvar}:Q",
                axis=alt.Axis(title=None),
                scale=alt.Scale(nice=False)),
        y=alt.Y(f"{yvar}:Q", axis=alt.Axis(title=None)),
        detail=alt.Detail(f"{samplevar}:N"),
        color=alt.value(COLORS["light_gray"]),
    )

    chart = background_chart.encode(color=alt.condition(
        selection,
        f"{targetvar}:N",
        alt.value("transparent"),
        legend=alt.Legend(title=f"{targetvar.title()} (click to highlight)"),
    ), ).add_selection(selection)

    return background_chart + chart
示例#2
0
def parallel_coordinates(data,
                         class_column,
                         cols=None,
                         alpha=None,
                         width=450,
                         height=300,
                         interactive=True,
                         var_name="variable",
                         value_name="value",
                         **kwds):
    """
    Parallel coordinates plotting.

    Parameters
    ----------
    frame: DataFrame
    class_column: str
        Column name containing class names
    cols: list, optional
        A list of column names to use
    alpha: float, optional
        The transparency of the lines
    interactive : bool, optional
        if True (default) then produce an interactive plot
    width : int, optional
        the width of the plot in pixels
    height : int, optional
        the height of the plot in pixels
    var_name : string, optional
        the legend title
    value_name : string, optional
        the y-axis label

    Returns
    -------
    chart: alt.Chart object
        The altair representation of the plot.

    See Also
    --------
    pandas.plotting.parallel_coordinates : matplotlib version of this routine
    """
    if kwds:
        warnings.warn("Unrecognized keywords in pdvega.scatter_matrix: {0}"
                      "".format(list(kwds.keys())))

    # Transform the dataframe to be used in Vega-Lite
    if cols is not None:
        data = data[list(cols) + [class_column]]
    cols = data.columns
    df = data.reset_index()
    index = (set(df.columns) - set(cols)).pop()
    assert index in df.columns
    df = df.melt([index, class_column],
                 var_name=var_name,
                 value_name=value_name)

    chart = alt.Chart(df).properties(width=width, height=height)
    chart = chart.mark_line().encode(
        x=alt.X(field=var_name, type=infer_vegalite_type(df[var_name])),
        y=alt.Y(field=value_name, type=infer_vegalite_type(df[value_name])),
        color=alt.Color(field=class_column,
                        type=infer_vegalite_type(df[class_column])),
        detail=alt.Detail(field=index, type=infer_vegalite_type(df[index])))

    if alpha is None and df[class_column].nunique() > 20:
        alpha = 0.3

    if alpha is not None:
        assert 0 <= alpha <= 1
        return chart.encode(opacity=alt.value(alpha))
    return chart
示例#3
0
line = pd.DataFrame({       #create fake df so line can get drawn
    'line_x': [0,14000],
    'line_y': [0,14000],
    })

line_plot = alt.Chart(line).mark_line(
    opacity=0.3).encode(  #draw fake line plot
        x='line_x', y='line_y')

st.altair_chart(usage_2019_vs_2020 + line_plot, use_container_width=True)

st.subheader("Bring in 1figr Tiers from 2018")
figr_Tier_hist = alt.Chart(df[filt].reset_index()).mark_bar().encode(
    alt.X('1figr Tier:O'),
    alt.Y('count()'),
    alt.Detail('index:Q'),
    tooltip=['1figr Tier', 'Title Name', 'CPU_2020'],
    color=alt.Color('Decision:N',
                    scale=subscribed_colorscale)  #Nominal data type
).interactive().properties(
    height=400,
    title={
        "text": ["Histogram of 1figr Tiers"],
        "subtitle": [
            "Titles stacked by Tier",
            "NaN and 0 mean Tier was not available for that title"
        ],
        "color":
        "black",
        "subtitleColor":
        "gray"
示例#4
0
def andrews_curves(data,
                   class_column,
                   samples=200,
                   alpha=None,
                   width=450,
                   height=300,
                   **kwds):
    """
    Generates an Andrews curves visualization for visualising clusters of
    multivariate data.

    Andrews curves have the functional form:

    f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) +
           x_4 sin(2t) + x_5 cos(2t) + ...

    Where x coefficients correspond to the values of each dimension and t is
    linearly spaced between -pi and +pi. Each row of frame then corresponds to
    a single curve.

    Parameters:
    -----------
    data : DataFrame
        Data to be plotted, preferably normalized to (0.0, 1.0)
    class_column : string
        Name of the column containing class names
    samples : integer
        Number of points to plot in each curve
    alpha: float, optional
        The transparency of the lines
    width : int, optional
        the width of the plot in pixels
    height : int, optional
        the height of the plot in pixels
    **kwds: keywords
        Additional options

    Returns:
    --------
    chart: alt.Chart object

    """
    if kwds:
        warnings.warn("Unrecognized keywords in pdvega.andrews_curves(): {0}"
                      "".format(list(kwds.keys())))

    t = np.linspace(-np.pi, np.pi, samples)
    vals = data.drop(class_column, axis=1).values.T

    curves = np.outer(vals[0], np.ones_like(t))
    for i in range(1, len(vals)):
        ft = ((i + 1) // 2) * t
        if i % 2 == 1:
            curves += np.outer(vals[i], np.sin(ft))
        else:
            curves += np.outer(vals[i], np.cos(ft))

    df = pd.DataFrame({
        "t":
        np.tile(t, curves.shape[0]),
        "sample":
        np.repeat(np.arange(curves.shape[0]), curves.shape[1]),
        " ":
        curves.ravel(),
        class_column:
        np.repeat(data[class_column], samples),
    })

    chart = alt.Chart(df).properties(width=width, height=height).mark_line()
    chart = chart.encode(
        x=alt.X(field="t", type="quantitative"),
        y=alt.Y(field=" ", type="quantitative"),
        color=alt.Color(field=class_column,
                        type=infer_vegalite_type(df[class_column])),
        detail=alt.Detail(field='sample', type="quantitative"))

    if alpha is None and df[class_column].nunique() > 20:
        alpha = 0.5

    if alpha is not None:
        assert 0 <= alpha <= 1
        return chart.encode(opacity=alt.value(alpha))

    return chart
示例#5
0
                    timeUnit='year',
                    scale=alt.Scale(range=['green', 'yellow', 'blue']))).facet(
                        'primary', columns=2)

chart
"""## <b>V2</b>
### Graph with details and tooltips
#### <b>Pros</b>
All the necessary data is present
#### <b>Cons</b>
Too much data, hard to follow and understand
"""

chart = base.mark_circle(size=80).encode(y=alt.Y('average(score)'),
                                         x=alt.X('count()'),
                                         detail=alt.Detail('date:T',
                                                           timeUnit='year'),
                                         tooltip=alt.Tooltip('date:T',
                                                             timeUnit='year'),
                                         color=alt.Color('primary'))

chart
"""## <b>V3</b>
### Interactive graphs - click on upper's graph genre to get data by year in the lower graph (both graphs have tooltips)
#### <b>Pros</b>
Flexibility, not too much data
#### <b>Cons</b>
Takes time to get used to, hard to compare particular years
"""

selector = alt.selection_single(fields=['primary'])
示例#6
0
 def _alt_detail(self):
     return alt.Detail(f'{self._detailby}:N')
示例#7
0
            "High Cost-per-Use rank (least economical) journals show up in darker colors"
        ],
        "color":
        "black",
        "subtitleColor":
        "gray"
    }).add_selection(selection2)
st.altair_chart(weighted_vs_cost2, use_container_width=True)

st.subheader(
    'Look into the authorships, citations, and downloads of each journal')
auth_hist = alt.Chart(df[filt].reset_index()).mark_bar(width=10).encode(
    alt.X('authorships:Q',
          title="Authorships (average per year over the next five years)"),
    alt.Y('count()', axis=alt.Axis(grid=False)),
    alt.Detail('index'),
    tooltip=['title', 'authorships', 'subscription_cost', 'subscribed'],
    color=alt.Color('subscribed:N', scale=subscribed_colorscale)
).interactive().properties(
    height=400,
    title={
        "text": ["Authorships Distribution"],
        "subtitle": [
            "What do the range of Authorships look like?",
            "Use this graph to help set the Authorships slider filter and narrow down titles of interest"
        ],
        "color":
        "black",
        "subtitleColor":
        "gray"
    })
示例#8
0
############################
#line weekday chart
############################

source = df_per_weekday


df_per_weekday['option_chosen']='other species'

df_per_weekday.loc[df_per_weekday['species']==option, 'option_chosen'] = option

line=alt.Chart(source).mark_line(point=True).encode(
            x='weekday_char',
            y=alt.Y('dev_pct_from_avg_price', axis=alt.Axis(format='+%')),
            detail=alt.Detail('species'),
            tooltip='species',
            color=alt.Color('option_chosen', scale=alt.Scale(domain=['other species', option]
                                                        , range = ['#cfebfd','#0C266A']), legend =None)
            ).interactive().properties(
    height=450
)

# layer that accomplishes the highlighting
source_highlight = df_per_weekday[df_per_weekday["species"] == option]
line_highlight = alt.Chart(source_highlight).mark_line(point=True, size =3).encode(
                    x=alt.X('weekday_char', title = 'Dia de la Semana'),
                    y=alt.Y('dev_pct_from_avg_price', title = 'Desviacion del precio medio'),
                    detail=alt.Detail('species'),
                    tooltip='species',
                    color=alt.Color('option_chosen', scale=alt.Scale(domain=['other species', option]