def andrews_curve( data: pd.DataFrame, xvar: str = "t", yvar: str = "curve_value", targetvar: str = "target", samplevar: str = "sample", w: int = 450, h: int = 300, ) -> alt.LayerChart: selection = alt.selection_single(fields=[targetvar], bind="legend") base = alt.Chart(data).properties(width=w, height=h).mark_line() background_chart = base.encode( x=alt.X(f"{xvar}:Q", axis=alt.Axis(title=None), scale=alt.Scale(nice=False)), y=alt.Y(f"{yvar}:Q", axis=alt.Axis(title=None)), detail=alt.Detail(f"{samplevar}:N"), color=alt.value(COLORS["light_gray"]), ) chart = background_chart.encode(color=alt.condition( selection, f"{targetvar}:N", alt.value("transparent"), legend=alt.Legend(title=f"{targetvar.title()} (click to highlight)"), ), ).add_selection(selection) return background_chart + chart
def parallel_coordinates(data, class_column, cols=None, alpha=None, width=450, height=300, interactive=True, var_name="variable", value_name="value", **kwds): """ Parallel coordinates plotting. Parameters ---------- frame: DataFrame class_column: str Column name containing class names cols: list, optional A list of column names to use alpha: float, optional The transparency of the lines interactive : bool, optional if True (default) then produce an interactive plot width : int, optional the width of the plot in pixels height : int, optional the height of the plot in pixels var_name : string, optional the legend title value_name : string, optional the y-axis label Returns ------- chart: alt.Chart object The altair representation of the plot. See Also -------- pandas.plotting.parallel_coordinates : matplotlib version of this routine """ if kwds: warnings.warn("Unrecognized keywords in pdvega.scatter_matrix: {0}" "".format(list(kwds.keys()))) # Transform the dataframe to be used in Vega-Lite if cols is not None: data = data[list(cols) + [class_column]] cols = data.columns df = data.reset_index() index = (set(df.columns) - set(cols)).pop() assert index in df.columns df = df.melt([index, class_column], var_name=var_name, value_name=value_name) chart = alt.Chart(df).properties(width=width, height=height) chart = chart.mark_line().encode( x=alt.X(field=var_name, type=infer_vegalite_type(df[var_name])), y=alt.Y(field=value_name, type=infer_vegalite_type(df[value_name])), color=alt.Color(field=class_column, type=infer_vegalite_type(df[class_column])), detail=alt.Detail(field=index, type=infer_vegalite_type(df[index]))) if alpha is None and df[class_column].nunique() > 20: alpha = 0.3 if alpha is not None: assert 0 <= alpha <= 1 return chart.encode(opacity=alt.value(alpha)) return chart
line = pd.DataFrame({ #create fake df so line can get drawn 'line_x': [0,14000], 'line_y': [0,14000], }) line_plot = alt.Chart(line).mark_line( opacity=0.3).encode( #draw fake line plot x='line_x', y='line_y') st.altair_chart(usage_2019_vs_2020 + line_plot, use_container_width=True) st.subheader("Bring in 1figr Tiers from 2018") figr_Tier_hist = alt.Chart(df[filt].reset_index()).mark_bar().encode( alt.X('1figr Tier:O'), alt.Y('count()'), alt.Detail('index:Q'), tooltip=['1figr Tier', 'Title Name', 'CPU_2020'], color=alt.Color('Decision:N', scale=subscribed_colorscale) #Nominal data type ).interactive().properties( height=400, title={ "text": ["Histogram of 1figr Tiers"], "subtitle": [ "Titles stacked by Tier", "NaN and 0 mean Tier was not available for that title" ], "color": "black", "subtitleColor": "gray"
def andrews_curves(data, class_column, samples=200, alpha=None, width=450, height=300, **kwds): """ Generates an Andrews curves visualization for visualising clusters of multivariate data. Andrews curves have the functional form: f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + x_4 sin(2t) + x_5 cos(2t) + ... Where x coefficients correspond to the values of each dimension and t is linearly spaced between -pi and +pi. Each row of frame then corresponds to a single curve. Parameters: ----------- data : DataFrame Data to be plotted, preferably normalized to (0.0, 1.0) class_column : string Name of the column containing class names samples : integer Number of points to plot in each curve alpha: float, optional The transparency of the lines width : int, optional the width of the plot in pixels height : int, optional the height of the plot in pixels **kwds: keywords Additional options Returns: -------- chart: alt.Chart object """ if kwds: warnings.warn("Unrecognized keywords in pdvega.andrews_curves(): {0}" "".format(list(kwds.keys()))) t = np.linspace(-np.pi, np.pi, samples) vals = data.drop(class_column, axis=1).values.T curves = np.outer(vals[0], np.ones_like(t)) for i in range(1, len(vals)): ft = ((i + 1) // 2) * t if i % 2 == 1: curves += np.outer(vals[i], np.sin(ft)) else: curves += np.outer(vals[i], np.cos(ft)) df = pd.DataFrame({ "t": np.tile(t, curves.shape[0]), "sample": np.repeat(np.arange(curves.shape[0]), curves.shape[1]), " ": curves.ravel(), class_column: np.repeat(data[class_column], samples), }) chart = alt.Chart(df).properties(width=width, height=height).mark_line() chart = chart.encode( x=alt.X(field="t", type="quantitative"), y=alt.Y(field=" ", type="quantitative"), color=alt.Color(field=class_column, type=infer_vegalite_type(df[class_column])), detail=alt.Detail(field='sample', type="quantitative")) if alpha is None and df[class_column].nunique() > 20: alpha = 0.5 if alpha is not None: assert 0 <= alpha <= 1 return chart.encode(opacity=alt.value(alpha)) return chart
timeUnit='year', scale=alt.Scale(range=['green', 'yellow', 'blue']))).facet( 'primary', columns=2) chart """## <b>V2</b> ### Graph with details and tooltips #### <b>Pros</b> All the necessary data is present #### <b>Cons</b> Too much data, hard to follow and understand """ chart = base.mark_circle(size=80).encode(y=alt.Y('average(score)'), x=alt.X('count()'), detail=alt.Detail('date:T', timeUnit='year'), tooltip=alt.Tooltip('date:T', timeUnit='year'), color=alt.Color('primary')) chart """## <b>V3</b> ### Interactive graphs - click on upper's graph genre to get data by year in the lower graph (both graphs have tooltips) #### <b>Pros</b> Flexibility, not too much data #### <b>Cons</b> Takes time to get used to, hard to compare particular years """ selector = alt.selection_single(fields=['primary'])
def _alt_detail(self): return alt.Detail(f'{self._detailby}:N')
"High Cost-per-Use rank (least economical) journals show up in darker colors" ], "color": "black", "subtitleColor": "gray" }).add_selection(selection2) st.altair_chart(weighted_vs_cost2, use_container_width=True) st.subheader( 'Look into the authorships, citations, and downloads of each journal') auth_hist = alt.Chart(df[filt].reset_index()).mark_bar(width=10).encode( alt.X('authorships:Q', title="Authorships (average per year over the next five years)"), alt.Y('count()', axis=alt.Axis(grid=False)), alt.Detail('index'), tooltip=['title', 'authorships', 'subscription_cost', 'subscribed'], color=alt.Color('subscribed:N', scale=subscribed_colorscale) ).interactive().properties( height=400, title={ "text": ["Authorships Distribution"], "subtitle": [ "What do the range of Authorships look like?", "Use this graph to help set the Authorships slider filter and narrow down titles of interest" ], "color": "black", "subtitleColor": "gray" })
############################ #line weekday chart ############################ source = df_per_weekday df_per_weekday['option_chosen']='other species' df_per_weekday.loc[df_per_weekday['species']==option, 'option_chosen'] = option line=alt.Chart(source).mark_line(point=True).encode( x='weekday_char', y=alt.Y('dev_pct_from_avg_price', axis=alt.Axis(format='+%')), detail=alt.Detail('species'), tooltip='species', color=alt.Color('option_chosen', scale=alt.Scale(domain=['other species', option] , range = ['#cfebfd','#0C266A']), legend =None) ).interactive().properties( height=450 ) # layer that accomplishes the highlighting source_highlight = df_per_weekday[df_per_weekday["species"] == option] line_highlight = alt.Chart(source_highlight).mark_line(point=True, size =3).encode( x=alt.X('weekday_char', title = 'Dia de la Semana'), y=alt.Y('dev_pct_from_avg_price', title = 'Desviacion del precio medio'), detail=alt.Detail('species'), tooltip='species', color=alt.Color('option_chosen', scale=alt.Scale(domain=['other species', option]