def plot_planning(planning, need, timeline): # Plot graph - Requirement source = need.copy() source = source.rename(columns={0: "Hours"}) source["Date"] = source.index bars_need = (alt.Chart(source).mark_bar().encode( y="Hours:Q", column=alt.Column("Date:N"), tooltip=["Date", "Hours"], ).interactive().properties( width=550 / len(timeline) - 22, height=75, title='Requirement', )) # Plot graph - Optimized planning source = planning.filter(like="Total hours", axis=0).copy() source["Date"] = list(source.index.values) source = source.rename(columns={"Solution": "Hours"}).reset_index() source[["Date", "Line"]] = source["Date"].str.split(",", expand=True) source["Date"] = source["Date"].str.split("[").str[1] source["Line"] = source["Line"].str.split("]").str[0] source["Min capacity"] = 7 source["Max capacity"] = 12 source = source.round({"Hours": 1}) source["Load%"] = pd.Series( ["{0:.0f}%".format(val / 8 * 100) for val in source["Hours"]], index=source.index, ) bars = (alt.Chart(source).mark_bar().encode( x="Line:N", y="Hours:Q", column=alt.Column("Date:N"), color="Line:N", tooltip=["Date", "Line", "Hours", "Load%"], ).interactive().properties( width=550 / len(timeline) - 22, height=150, title="Optimized Production Schedule", )) chart = alt.vconcat(bars, bars_need) chart.save("planning_time_model2.html") dp.Report(dp.Plot( chart, caption="Production schedule model 2 - Time")).publish( name="Optimized production schedule model 2 - Time", description="Optimized production schedule model 2 - Time", open=True, visibily="PUBLIC", )
def plot_classes(counters, state): bars = (alt.Chart(counters).mark_bar().encode( y="class", x="n_count", row=alt.Row("sample:N"), column=alt.Column("state:N", sort="descending"), )) text = bars.mark_text( align="left", baseline="middle", dx=3, # Nudges text to right so it doesn't appear on top of the bar ).encode(text="n_count", row=alt.Row("sample:N"), column=alt.Column("state:N")) return bars, text
def main(): df = pd.read_csv('Endangered_Species.csv') chart = alt.Chart(df).mark_bar().encode( alt.X('Species:N'), alt.Y('Population:Q'), alt.Column('Year'), alt.Color('Species:N')).properties( title="Endangered Species Population") chart.save('Endangered_Species_Population.json')
def summarize_repeat_classes(prefix, alignment_methods, output_class, map_methods): repeat_classes = ['SINE', 'LINE', 'LTR', 'Simple_repeat', 'Satellite'] methods = [] fractions = [] numbers = [] rclasses = [] for method in alignment_methods: repeat_sets, fractions_of_each_class = get_variant_sets_and_fractions( method=method, repeats=repeat_classes, prefix=prefix) fractions.extend(list(fractions_of_each_class.values())) numbers.extend([len(s) for s in repeat_sets.values()]) # Remove the space in "Simple repeat". rclasses.extend(['SINE', 'LINE', 'LTR', 'Simple repeat', 'Satellite']) methods.extend([map_methods[method]] * len(list(fractions_of_each_class.values()))) df_repeat_classes = pd.DataFrame() df_repeat_classes['Number'] = numbers df_repeat_classes['Fraction'] = fractions df_repeat_classes['Repeat Class'] = rclasses df_repeat_classes['Alignment Method'] = methods df_repeat_classes.to_csv(output_class, sep='\t', index=None) if PLOT: # Y: number of biased HETs. alt.Chart(df_repeat_classes).mark_bar().encode( x=alt.X('Alignment Method:N', sort=alignment_methods, title=None), y='Number', color='Alignment Method:N', column=alt.Column( 'Repeat Class:N', sort=['SINE', 'LINE', 'LTR', 'Satellite', 'Simple repeat']))
def scatter_matrix(df): """Scatter matrix plot Each column will be scatter plotted against all columns. """ base = (alt.Chart(df).transform_fold( list(df.columns), as_=['key_x', 'value_x' ]).transform_fold(list(df.columns), as_=['key_y', 'value_y']).encode( x=alt.X('value_y:Q', title=None, scale=alt.Scale(zero=False)), y=alt.Y('value_x:Q', title=None, scale=alt.Scale(zero=False)), ).properties(width=150, height=150)) plot = (alt.layer( base.mark_circle(), base.transform_regression('value_y', 'value_x', method='poly', order=4).mark_line(color='red'), ).facet( column=alt.Column('key_x:N', sort=list(df.columns), title=None), row=alt.Row('key_y:N', sort=list(reversed(df.columns)), title=None), ).resolve_scale(x='independent', y='independent').configure_header(labelFontStyle='bold')) return plot
def plot_leaderboard_twist(data): """Plots the per-day twistiness ranking, both as a bar chart and as a heatmap.""" print('leaderboard_twist') totals = data.groupby(level=('year', 'day')).sum() twist = pd.DataFrame({'twist': totals['two_stars'] / totals['one_star']}).reset_index() alt.Chart(twist) \ .encode( x='year:O', y=alt.Y('twist:Q', title='Twistiness'), color='year:N', column=alt.Column('day:O', title='Day of contest')) \ .mark_bar() \ .configure_scale(bandPaddingInner=0.2) \ .save('out/twist.html') color_scale = alt.Scale(scheme='yelloworangered', type='log') alt.Chart(twist) \ .encode( x=alt.X('day:O', title='Day of contest'), y='year:O', color=alt.Color('twist:Q', title='Twistiness (log)', scale=color_scale)) \ .mark_bar() \ .configure_scale(bandPaddingInner=0.1) \ .save('out/twist.heat.html')
def plot_leaderboard_time(data, series, file_name): """Plots the per-day time distribution for the leaderboard rankings. Args: data: Leaderboard data frame. series: Which series to use; 'one_star' or 'two_stars'. file_name: Output file base name. """ print(f'leaderboard_time:{series}') quantiles = data.loc[(slice(None), slice(None), [1, 25, 50, 75, 100])].unstack() points = (quantiles[series] / 60).rename(columns=lambda r: f'r{r}').reset_index() y_title = f'Time to get {series.replace("_", " ")} (ranks 25..75, min)' y_scale = alt.Scale(type='log') base = alt.Chart(points).encode(x='year:O', color='year:N') rule = base.mark_rule().encode(alt.Y('r1:Q', title=y_title, scale=y_scale), alt.Y2('r100:Q')) bar = base.mark_bar().encode(alt.Y('r25:Q'), alt.Y2('r75:Q')) faceted = (rule + bar).facet(column=alt.Column('day:O', title='Day of contest')) faceted.configure_scale(bandPaddingInner=0.4).save(file_name + '.html') points['yday'] = points.day + (points.year - points.year.mean())/(1.5*(points.year.max() - points.year.min())) alt.Chart(points) \ .encode(x=alt.X('yday:Q', title='day'), color='year:N') \ .mark_rule() \ .encode(alt.Y('r25:Q', title=y_title, scale=y_scale), alt.Y2('r75:Q')) \ .properties(width=1000, height=600) \ .save(file_name + '.byday.html')
def plot_forces(variables: pd.DataFrame) -> None: forces = variables.drop(["Height", "Weight"], axis=1).melt() forces[["type", "variable"]] = forces["variable"].str.split(expand=True) tables.describe_table(forces, groupby=["variable", "type"], description="variables") row_kwargs = dict(shorthand="variable", title=None, sort=forces_order) column = alt.Column("type", title=None) forces_plot = (plot_kde().facet( data=forces.query("type != 'Imb'"), row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) imb_plot = (plot_kde().facet( data=forces.query("type == 'Imb'"), row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs), column=column, ).resolve_scale(y="independent").properties(bounds="flush")) plots = (forces_plot | imb_plot).configure_facet(spacing=5) st.altair_chart(plots)
def player_roll_chart(self): """ """ # Make Altair bar chart plt_df = self.player_count.round(2) roll_chart = alt.Chart(plt_df).mark_bar(strokeWidth=0.5, stroke="black").encode( x=alt.X("Player:O", axis=alt.Axis(title=None, labels=False, ticks=False)), y='Count:Q', color=alt.Color('Player:N', scale=alt.Scale( domain=self.player_names, range=self.player_colors), legend=alt.Legend()), column=alt.Column("Roll:N", header=alt.Header(title=None, labelOrient="bottom", labelFontSize=22)), tooltip=list(self.player_count.columns) ).configure_view( strokeWidth=0 ).configure_title( fontSize=32, limit=800, dx=45, dy=-50, font="Arial", align="center", anchor="middle" ).configure_legend( strokeColor="black", padding=10, orient="bottom", cornerRadius=10, direction="horizontal", labelFontSize=10 ).properties( title="Roll Count by Player", width=self.screen_width / 45 ).configure_axis( grid=False, labelFontSize=14, titleFontSize=16 ) return roll_chart
def dur_dist_plot(dur_dist, to_json_for_lab=None): if to_json_for_lab is not None: alt.data_transformers.register("json", to_json_for_lab) alt.data_transformers.enable("json") return (alt.Chart(dur_dist).transform_density( "duration", as_=["duration", "density"], extent=[0, 70], groupby=["cluster"]).mark_area(orient="horizontal").encode( y="duration:Q", color="cluster:N", x=alt.X( "density:Q", stack="center", impute=None, title=None, axis=alt.Axis(labels=False, values=[0], grid=False, ticks=True), ), column=alt.Column( "cluster:N", header=alt.Header( titleOrient="bottom", labelOrient="bottom", labelPadding=0, ), ), ).properties(width=100).configure_facet(spacing=0).configure_view( stroke=None))
def punchcode(): dat = df.copy() dat['mnth_yr'] = dat['workshop_start'].dt.to_period('M').astype(str) dat['workshop_category'] = dat['workshop_category'].apply( lambda x: 'Corporate' if (x == 'Corporate') else 'Public') dat['contrib'] = dat['workshop_hours'] * dat['class_size'] chart = alt.Chart( dat[dat.name != 'Capstone']).mark_circle(color='#bbc6cbe6').encode( x=alt.X('mnth_yr:T', axis=alt.Axis(title='')), y='name:O', size=alt.Size('sum(contrib):Q', legend=None), column=alt.Column('workshop_category:O', title=None, sort="descending", header=alt.Header(titleColor='#bbc6cbe6', labelColor='#bbc6cbe6', labelAngle=30, titleFontSize=40, titleAngle=30))).properties( width=300, height=320).configure_axis( labelColor='#bbc6cbe6', titleColor='#bbc6cbe6', grid=False) return chart.to_json()
def cbo_bar_chart(cbo_data, var, title, bar_width=30, width=600, height=250): """ Creates a bar chart comparing the current and new CBO projections Parameters ---------- cbo_data: data containing both current and new CBO projections concatenated together var: Y-axis variable title: title of the chart bar_width: width of the bars in the plot width: width of the chart height: height of the chart """ # we divide up total width equally among facets of the chart _width = width / len(cbo_data["index"].value_counts()) chart = (alt.Chart(cbo_data, title=title).mark_bar(width=bar_width).encode( x=alt.X( "Projections", axis=alt.Axis(title=None, labels=False, ticks=False, labelFontSize=15), ), y=alt.Y(var, axis=alt.Axis(labelFontSize=10, titleFontSize=15)), color=alt.Color("Projections"), column=alt.Column("index", header=alt.Header(title=None, labelOrient="bottom")), ).properties( height=height, width=_width).configure_view(stroke="transparent").configure_facet( spacing=0).configure_title(fontSize=20)) return chart
def price_subplot(df, color='Category', color_sort_order=['Clothes', 'Accessory'], color_scale='tableau10', price_scale=alt.Scale()): color_kwargs = {"scale": alt.Scale(scheme=color_scale)} if color_sort_order: color_kwargs['sort'] = color_sort_order chart = alt.Chart(df, width=50) result = chart.mark_circle(size=12, opacity=0.7).encode( x=alt.X( 'jitter:Q', title=None, axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), scale=alt.Scale(), ), y=alt.Y("Price:Q", axis=alt.Axis(), scale=price_scale), color=alt.Color(f'{color}:N', **color_kwargs), tooltip=['Product', 'Price', 'Price-2019']).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter='sqrt(-2*log(random()))*cos(2*PI*random())') result = chart.mark_rule( color='red', size=2).encode(y=alt.Y("median(Price-2019):Q")) + result return result.facet(column=alt.Column( 'Year:O', header=alt.Header( labelAngle=-90, titleOrient='top', labelOrient='bottom', labelAlign='right', labelPadding=3, ), ))
def _build_base_change_chart(data): """Create the base change chart.""" width = 100 height = 200 placeholder_width = (4 * width) + 80 # 4 charts, plus constant spacing title = 'Biallelic base changes from reference' base_change_data = pd.DataFrame(data, columns=['ref', 'alt', 'count']) base_change_chart = _placeholder_for_empty_chart('No biallelic SNPs', width=placeholder_width, height=height, title=title) if not base_change_data.empty: bars = alt.Chart(base_change_data).mark_bar().encode( x=alt.X('alt', title='to alt'), y=alt.Y('count', title='Count', axis=alt.Axis(format='s')), color=alt.Color('alt', legend=None, sort=BASES, scale=alt.Scale(scheme='category20', domain=BASES)), tooltip=alt.Tooltip('count', format='.4s')) labels = bars.mark_text(dy=-5, fontWeight='bold').encode(text='alt') base_change_chart = (bars + labels) \ .properties(width=100, height=200) \ .facet(column=alt.Column('ref', title=title, sort=BASES)) return base_change_chart
def view_annotation_summary(df): if 'pid' in df.columns: pids = df['pid'].unique() charts = [] for pid in pids: charts.append( alt.Chart(df[df['pid'] == pid], title=pid, width=1200, height=800).mark_bar().encode( x=alt.X( 'DURATION_IN_SECONDS:Q', axis=alt.Axis(title='Duration (seconds)')), y=alt.Y('LABEL_NAME:N', axis=alt.Axis(title='Annotations')))) chart = alt.vconcat(*charts).configure_axis(labelFontSize=14) else: if 'annotator' in df.columns: chart = alt.Chart(df, width=1200, height=800).mark_bar().encode( x=alt.X('DURATION_IN_SECONDS:Q', axis=alt.Axis(title='Duration (seconds)')), column=alt.Column('LABEL_NAME:N', axis=alt.Axis(title='Annotations')), y=alt.Y('annotator:N', axis=alt.Axis(title='Annotator')), color='annotator:N') return chart
def bv_violinPlot(data, engine, xlabel, ylabel1, ylabel2): data = data.copy() data.rename(columns={'plotY':ylabel1, 'plotX1':ylabel2}, inplace=True) data = data[[ylabel1, ylabel2]].copy() if engine == 'Static': plt.rcParams['figure.figsize'] = (9,6) fig = sns.violinplot(x = 'variable', y = 'value', data = data.melt()) fig.grid(b=True, which='major', color='k', linewidth=0.25) fig.grid(b=True, which='minor', color='k', linewidth=0.125) plt.close() return pn.pane.Matplotlib(fig.figure, tight=True) elif engine == 'Interactive': p = alt.Chart(data.dropna().melt()) p = p.transform_density('value', as_=['value', 'density'], groupby=['variable']) p = p.mark_area(orient='horizontal').encode( y=alt.Y('value:Q', axis=alt.Axis(format='~s')), color='variable:N', x=alt.X('density:Q', stack='center', impute=None, title=None, axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True)), column=alt.Column('variable:N', header=alt.Header(titleOrient='bottom', labelOrient='bottom', labelPadding=0))) p = p.properties(width = 200, height = 280) p = p.configure_facet(spacing=0) p = p.configure_view(stroke=None) return p
def squares(proba, actual, predicted): df = pd.DataFrame(proba, copy=True) metadata = pd.DataFrame({'predicted': predicted, 'actual': actual}) for col in df.columns: df[col][metadata['actual'] != col] = None df['predicted'] = predicted df['actual'] = actual df.columns = df.columns.astype(str) df = df.melt(id_vars=['predicted', 'actual']) bins = [b / 10 for b in range(11)] df['bins'] = pd.cut(df['value'], bins=bins, include_lowest=True) b = pd.DataFrame({ 'count': df.groupby(['bins', 'variable', 'actual', 'predicted'])['bins'].size() }).reset_index() b['bins'] = b['bins'].astype(str) chart = alt.Chart(b, title='').mark_bar().encode( x=alt.X('count', axis=alt.Axis(ticks=False, labels=False, title='')), y=alt.Y('bins:N', title='Prediction Score', sort=alt.EncodingSortField(field='bins', op='sum', order='descending')), column=alt.Column('actual', title=''), color='predicted:N', tooltip=['count', 'predicted']).properties(width=100).configure_axis( grid=False).configure_view(strokeOpacity=0) return chart
def tibble(actual, predicted, ncol=10): df = [] for class_name in np.unique(actual): actual_mask = actual == class_name predicted_f = np.sort(predicted[actual_mask]) nrow = (len(predicted_f) // ncol) padding = predicted_f.shape[0] % ncol column_index = list(np.tile(range(ncol), nrow)) column_index = column_index + [ range(ncol)[l] for l in range(len(predicted_f) - len(column_index)) ] row_index = list(np.repeat(range(nrow), ncol)) + list( np.repeat(nrow, len(predicted_f) % ncol)) res = pd.DataFrame({ 'Predicted Label': predicted_f, 'row': column_index, 'column': row_index, 'Class Index': class_name }) df += [res] df = pd.concat(df) chart = alt.Chart(df).mark_circle().encode( x=alt.X('row:O', axis=alt.Axis(ticks=False, labels=False, title='')), y=alt.Y('column:O', axis=alt.Axis(ticks=False, labels=False, title='')), column=alt.Column('Class Index', title=''), color='Predicted Label:N', tooltip=['Predicted Label' ]).properties(width=ncol * 6, height=nrow * 8).configure_axis( grid=False, domainWidth=0).configure_view(strokeOpacity=0) return chart
def bar_graph_specialities_availability(self, years, cities, specialities): """ Plot bar graph of available specialites from selected cities. :param years: selected years :param cities: selected city / all cities :param specialities: list of selected specialities """ if len(specialities) == 0: st.warning("Nicio specialitate nu a fost selectă") return st.title("Grafice comparative") df = self.__filter_available_specialities_by_year( years, cities, specialities) if len(years) > 1: chart = alt.Chart(df).mark_bar().encode( x=alt.X('Oraș:N', axis=alt.Axis(title='Oraș')), y=alt.Y('Locuri disponibile:Q', axis=alt.Axis(grid=False, title='Locuri disponibile'), sort="-x"), column=alt.Column('An:N'), color=alt.Color('Specialitate:N'), tooltip=['Specialitate:N', 'Locuri disponibile:Q', 'Oraș:N']) st.altair_chart(chart) else: chart = alt.Chart(df).mark_bar().encode( x=alt.X('Locuri disponibile:Q', axis=alt.Axis(grid=False, title='Locuri disponibile'), sort="-x"), y=alt.Y('Oraș'), color=alt.Color('Specialitate:N'), tooltip=['Specialitate:N', 'Locuri disponibile:Q', 'Oraș:N']) st.altair_chart(chart)
def altairPlot(): import altair as alt from vega_datasets import data source = data.movies.url stripplot = alt.Chart(source, width=80).mark_circle(size=8).encode( x=alt.X( 'jitter:Q', title=None, axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), scale=alt.Scale(), ), y=alt.Y('IMDB_Rating:Q'), color=alt.Color('Major_Genre:N', legend=None), column=alt.Column( 'Major_Genre:N', header=alt.Header( labelAngle=-90, titleOrient='top', labelOrient='bottom', labelAlign='right', labelPadding=3, ), ), ).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter='sqrt(-2*log(random()))*cos(2*PI*random())').configure_facet( spacing=0).configure_view(stroke=None) return stripplot
def create_viz(emotions_dict, src_list): new_df = pd.DataFrame(columns=['keywords', 'c2', 'values', 'Emotion']) for key in emotions_dict.keys(): word = key value_trusted_src = emotions_dict[key][src_list[0]] value_other_src1 = emotions_dict[key][src_list[1]] value_other_src2 = emotions_dict[key][src_list[2]] rows_trusted_src = get_rows_src(value_trusted_src, "Trusted Source", word) rows_other_src1 = get_rows_src(value_other_src1, "Other Source 1", word) rows_other_src2 = get_rows_src(value_other_src2, 'Other Source 2', word) new_df = add_to_df(rows_trusted_src, new_df) new_df = add_to_df(rows_other_src1, new_df) new_df = add_to_df(rows_other_src2, new_df) chart = alt.Chart(new_df).mark_bar().encode( x=alt.X('c2:N', axis=alt.Axis(title='')), y=alt.Y('sum(values):Q', axis=alt.Axis(grid=False, title='')), column=alt.Column('keywords:N'), color=alt.Color('Emotion:N', scale=alt.Scale(range=[ '#96ceb4', '#ffcc5c', '#ff6f69', '#ff9BD8', '#877DD8' ], ))) chart = chart.properties(width=200, height=500) chart.save('./emotion_sample.svg')
def bestTeamPlot(year, top): bigDf = pd.read_csv("App/Data/CumulativeSeasons.csv") dfSeason = bigDf[bigDf['season'] == str(year + 2000) + "/" + str((year + 1) + 2000)] if top: df = dfSeason.groupby( ['result', 'team_long_name']).size()['won'].sort_values(ascending=False)[:5] else: df = dfSeason.groupby(['result', 'team_long_name' ]).size()['won'].sort_values()[:5] teamList = df.index.tolist() num_players = 11 df_won = [] df_lost = [] df_draw = [] for i in df.index: won = int( (dfSeason.groupby(['team_long_name', "result"]).size()[i][2]) / num_players) lost = int( (dfSeason.groupby(['team_long_name', "result"]).size()[i][1]) / num_players) draw = int( (dfSeason.groupby(['team_long_name', "result"]).size()[i][0]) / num_players) df_won.append(won) df_lost.append(lost) df_draw.append(draw) best = pd.DataFrame({ 'Team': teamList, 'Wins': df_won, 'Losts': df_lost, 'Draw': df_draw }) best.to_csv("best.csv", index=None) chart = alt.Chart( pd.melt(best, id_vars=['Team'], var_name='Result', value_name='Total'), height=400, width=165).mark_bar().encode( alt.X('Result:N', axis=alt.Axis(title="", labels=False)), alt.Y('Total:Q', axis=alt.Axis(title='Total', grid=False)), alt.Tooltip(["Total:Q"]), color=alt.Color('Result:N'), column=alt.Column( 'Team:O', sort=alt.EncodingSortField("Total", op='max', order='descending'), title="")).configure_view(stroke='transparent').interactive() return chart.to_json()
def outcome_bars(data, name=None, width=100): """ Create a bar chart showing the percentage of hands won, lost, and pushed """ # if it's a dataframe already, just add the name for the legend if isinstance(data, pd.DataFrame): data_list = [data] elif isinstance(data, list): # check if it's a list of dicionaries, like player history, or a list # of lists for item in data: l_o_d = isinstance(item, dict) # if it's a list of dictionaries, just convert them if l_o_d: data_list = [pd.DataFrame(data)] else: data_list = [pd.DataFrame(item) for item in data] else: msg = "'data' must be a DataFrame or list" raise TypeError(msg) # calculate percentages # assign name to data if not name: name = [f"Game{i}" for i in range(len(data))] plot_data_list = [] # list to hold dataframes that will be plotted for _name, _data in zip(name, data_list): win, loss, push = results_pct(_data) plot_data_list.append( { "game": _name, "result": "Win", "pct": win, "order": 1 }, ) plot_data_list.append({ "game": _name, "result": "Loss", "pct": loss, "order": 2 }) plot_data_list.append({ "game": _name, "result": "Push", "pct": push, "order": 3 }) plot_data = pd.DataFrame(plot_data_list) # create altair chart chart = alt.Chart(plot_data, width=width).mark_bar().encode( x=alt.X("game", axis=alt.Axis(labelAngle=-45), title=None, sort=["Win", "Loss", "Push"]), y=alt.Y("pct:Q"), color=alt.Color("game:O", legend=None), column=alt.Column("result:O", title="Result"), tooltip=[alt.Tooltip("pct", title="Pct")]) return chart
def make_category_plot_separate_top_n(infile, n_to_separate=20): pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) pd.set_option('display.width', None) grouped_flows = infra.pd.read_parquet(infile) grouped_flows = grouped_flows.reset_index() grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows["bytes_down"] # Figure out sorting order by total amount. cat_totals = grouped_flows.groupby("category").sum().reset_index() cat_sort_order = cat_totals.sort_values("bytes_total", ascending=False).set_index("bytes_total").reset_index() cat_sort_list = cat_sort_order["category"].tolist() user_totals = grouped_flows.groupby("user").sum().reset_index() user_sort_order = user_totals.sort_values("bytes_total", ascending=False).set_index("bytes_total").reset_index() user_sort_list = user_sort_order["user"].tolist() # Generate a frame from the sorted user list that identifies the top users top_annotation_frame = user_sort_order[["user"]] bottom_n = len(user_sort_order) - n_to_separate top_annotation_frame = top_annotation_frame.assign(topN="Bottom {}".format(bottom_n)) top_annotation_frame.loc[top_annotation_frame.index < n_to_separate, "topN"] = "Top {}".format(n_to_separate) grouped_flows["GB"] = grouped_flows["bytes_total"] / (1000**3) grouped_flows = grouped_flows[["category", "user", "GB"]].groupby(["user", "category"]).sum() grouped_flows = grouped_flows.reset_index() grouped_flows["logGB"] = grouped_flows["GB"].transform(np.log10) grouped_flows = grouped_flows.merge(top_annotation_frame, on="user") alt.Chart(grouped_flows).mark_rect().encode( x=alt.X("user:N", title="User (Sorted by Total GB)", axis=alt.Axis(labels=False), sort=user_sort_list, ), y=alt.Y("category:N", title="Category (Sorted by Total GB)", sort=cat_sort_list, ), # shape="direction", color=alt.Color( "GB:Q", title="Total GB", scale=alt.Scale(scheme="viridis"), ), ).facet( column=alt.Column( "topN:N", sort="descending", title="", ), ).resolve_scale( x="independent", color="independent" ).save( "renders/users_per_category_split_outliers.png", scale_factor=2, )
def income_expenses_over_time(df_orig): # Time interval aggregation level time_interval = st.sidebar.radio( "Time interval:", ("Month", "Quarter", "Year"), index=1) dfn, n_levels = time_interval_aggregation(df_orig, time_interval) if st.sidebar.checkbox('Invert sign of "Income"', value=True): dfn.loc["Income", :] = -dfn.loc["Income", :].values st.subheader('Income and Expenses over Time') plot_type = st.sidebar.selectbox('Plot type', ["pyplot", "altair", "bokeh"], key="plot_type") df_L0 = dfn.groupby(["Account_L0"]) \ .sum() \ .transpose() \ .reset_index() df_L0.columns.name = "Account" if plot_type == "pyplot": fig = plt.figure(figsize=(14, 5)) ax = plt.axes() df_L0.plot.bar(ax=ax, x=time_interval, y=["Income", "Expenses"], xlabel=time_interval, ylabel=df_L0["level_0"][0], rot=90) ax.locator_params(axis="x", tight=True, nbins=40) st.pyplot(fig) elif plot_type == "altair": n_intervals = df_L0.shape[0] df_new = df_L0.drop(columns="level_0") \ .set_index(time_interval) \ .stack() \ .reset_index() \ .rename(columns={0: dfn.columns.levels[0][0]}) custom_spacing = 2 chart = alt.Chart(df_new).mark_bar().encode( column=alt.Column(time_interval, spacing=custom_spacing, header=alt.Header(title="Income and Expenses", labelOrient='bottom', labelAlign='right', labelAngle=-90)), x=alt.X('Account:O', axis=alt.Axis(title=None, labels=False, ticks=False)), y=alt.Y('{}:Q'.format(dfn.columns.levels[0][0]), title=dfn.columns.levels[0][0], axis=alt.Axis(grid=False)), color=alt.Color('Account', scale=alt.Scale(range=['#EA98D2', '#659CCA'])), tooltip=[alt.Tooltip('Account:O', title='Account'), alt.Tooltip('{}:Q'.format(dfn.columns.levels[0][0]), title=dfn.columns.levels[0][0]), alt.Tooltip('{}:N'.format(time_interval), title=time_interval)] ).properties(width=(700 - n_intervals * custom_spacing) / n_intervals) st.altair_chart(chart, use_container_width=False) elif plot_type == "bokeh": x = [(ti, acnt) for ti in df_L0[time_interval] for acnt in ["Income", "Expenses"]] counts = sum(zip(df_L0['Income'], df_L0['Expenses']), ()) source = ColumnDataSource(data=dict(x=x, counts=counts)) p = figure(x_range=FactorRange(*x), plot_height=450, plot_width=900, title="Income and Expenses", toolbar_location="above", tooltips=[("Period, Account", "@x"), ("Value", "@counts")]) p.vbar(x='x', top='counts', width=0.9, source=source) p.y_range.start = 0 p.x_range.range_padding = 0.5 p.xaxis.major_label_orientation = 1 p.xgrid.grid_line_color = None st.bokeh_chart(p) return
def _build_vaf_histograms(histogram_json): """Create VAF histograms split by genotype.""" guides = {REF: 0, HET: 0.5, HOM: 1} hist_data = pd.DataFrame() for key in histogram_json: g = pd.DataFrame(histogram_json[key]) pretty, group = _prettify_genotype(key) g['GT'] = pretty # pretty genotype name g['g'] = group # main/other genotypes g['l'] = guides.get(pretty, None) # vertical line as guide hist_data = hist_data.append(g) main_hist_data = hist_data[hist_data['g'] == 'main'] other_hist_data = hist_data[hist_data['g'] == 'others'] # Main genotypes (ref, het, hom-alt) # Histogram bars themselves # s = bin_start, e = bin_end, c = count bars = alt.Chart(main_hist_data).mark_bar().encode( x=alt.X('s', title='VAF'), x2='e', y=alt.Y('c', title='Count', stack=True, axis=alt.Axis(format='s'))) # Vertical lines guides = alt.Chart(main_hist_data).mark_rule().encode(x='l') # Facet into 3 plots by genotype vaf_histograms = (bars + guides) \ .properties(width=200, height=200) \ .facet(column=alt.Column('GT', title='Main genotypes', sort=[REF, HET, HOM])) \ .resolve_scale(y='independent') # Other genotypes (uncalled, het with two alt alleles) # s = bin_start, e = bin_end, c = count other_vaf_histograms = alt.Chart(other_hist_data) \ .mark_bar().encode( x=alt.X('s', title='VAF'), x2='e', y=alt.Y('c', title='Count', stack=True, axis=alt.Axis(format='s')), column=alt.Column('GT', title='Other genotypes')) \ .properties(width=150, height=150) \ .resolve_scale(y='independent') return vaf_histograms, other_vaf_histograms
def plot(data): """ Takes in a Dataframe data containing information about the population, educational attainment, and internet access for U.S. counties. Plots a grouped bar chart visualization comparing 5 urban and 5 rural counties, and the relationship between attaining a Bachelor's Degree and lacking internet access for these counties. """ counties = [ 'New York County', 'Los Angeles County', 'Cook County', 'Harris County', 'Maricopa County', 'Chaves County', 'Aroostook County', 'Clallam County', 'McCracken County', 'St. Landry Parish' ] data = clean(data, counties) data = calculate_percentage(data) # Plot q4_chart = alt.Chart(data).mark_bar().encode( x=alt.X('Statistic', type='nominal', sort=counties, title=None, axis=alt.Axis(labels=False)), y='Percentage:Q', color=alt.Color('Statistic:N', scale=alt.Scale(range=['#96ceb4', '#ffcc5c']), title=None), column=alt.Column('County:N', sort=counties, header=alt.Header(titleOrient='bottom', labelOrient='bottom', labelAngle=-90, labelPadding=90, labelBaseline='middle')) ).properties( title={ 'text': [ 'Internet Access and Education Attainment in Urban vs. ' + 'Rural Counties (2016)' ], 'subtitle': [ '', '. Urban Urban Urban ' + ' Urban Urban Rural Rural' + ' Rural Rural Rural' ], 'subtitlePadding': 10 }).configure_title(fontSize=18, orient='top', offset=12, anchor='start').configure_axisX(labelPadding=100) q4_chart.save('q4_chart.html')
def faceted_bar_chart( df: pd.DataFrame(), xcol: str, xtitle: str, ycol: str, ytitle: str, colorcol: str, textcol: str, title: str, columncol: str, legend_title="Hardware") -> alt.vegalite.v4.api.FacetChart: """ Method that outputs a raw faceted bar chart. This does not process the input df, so it has to come already processed. Parameters ---------- df_: str dataframe from which the bar chart will be created. xcol: str dataframe column name that will be used for the x axis of the plot. xtitle:str title of the x-axis. ycol: str dataframe column name that will be used for the y axis of the plot. ytitle:str title of the y-axis. colorcol:str dataframe column name that which will hold the separation between colors. textcol: str dataframe column name that will be used for the displaying the numeric values inside the plot. columncol:str dataframe column name which holds the separation between all the faceted charts, x axis above plot. title: str Chart title. Returns ------- alt.vegalite.v4.api.Chart Faceted bar chart created from the input dataframe. """ bars = alt.Chart().mark_bar().encode( x=alt.X(xcol + ':N', title=xtitle), y=alt.Y(ycol + ':Q', title=ytitle), color=alt.Color(colorcol + ':N', title=legend_title), ) text = bars.mark_text( angle=270, align='left', baseline='middle', dx=10 # Nudges text to right so it doesn't appear on top of the bar ).encode(text=alt.Text(ycol + ':Q', format='.1f')) return alt.layer(bars, text, data=df).facet(column=alt.Column( columncol + ':N', header=alt.Header(labelAngle=-85, labelAlign='right'), title=title)).interactive()
def _generate_chart(results: pd.DataFrame, sorted: bool = False): if sorted: column = alt.Column("config:O", sort=SELECTED_CLUSTERS_TO_VISUALIZE) else: column = "config:O" return ( alt.Chart(results) .mark_circle() .encode(x="yx_planes:Q", y="read_duration:Q", color="reader:N", column=column,) )
def gen_chart(data: pd.DataFrame) -> alt.Chart: return ( alt.Chart(data[data.split == "test"]) .mark_line(opacity=0.5) .encode( x=alt.X("dimension:Q"), # , scale=alt.Scale(domain=xlim)), y=alt.Y("coef:Q"), # , scale=alt.Scale(domain=ylim)), color="n:N", column=alt.Column("regions:N"), ) ).properties(width=200, height=300)