def plot_hypothesis(hypothesis, file_name): bin_types = list(hypothesis) scores = list(hypothesis[bin_types[0]]) plots = [] for bin_type, score in product(bin_types, scores): mean_name = "Mean: " + score df = pd.DataFrame(columns=["Bin", "Dataset", mean_name]) df2 = pd.DataFrame(columns=["Bin", "t-statistic", 'p-value']) for bin_ in hypothesis[bin_type][score]: h = list(bin_.values())[0] bin_name = list(bin_)[0] parameter1 = h.p1 parameter2 = h.p2 mean1 = h.mean1 mean2 = h.mean2 row1 = { "Bin": bin_name, 'Dataset': parameter1, mean_name: str(round(float(mean1), 3)) } row2 = { "Bin": bin_name, 'Dataset': parameter2, mean_name: str(round(float(mean2), 3)) } df = df.append(row1, ignore_index=True) df = df.append(row2, ignore_index=True) t_statistic = h.t p_value = h.p row = { "Bin": bin_name, 't-statistic': str(round(t_statistic, 3)), 'p-value': str(p_value), '95% Confidence': "Significant" if p_value <= 0.05 else "Not Significant" } df2 = df2.append(row, ignore_index=True) plots.append( (ggplot(df, aes(x='Bin', y=mean_name, fill='Dataset')) + geom_col(stat='identity', position='dodge') + ggtitle("{0} bin distribution| {1}\nBin's Average Scores".format( bin_type, score)))) plots.append( (ggplot(df2, aes(x='Bin', y='p-value', fill='95% Confidence')) + geom_col(stat='identity', width=0.2) + ggtitle( "{0} bin distribution| {1}\nBin's 95% Confidence Level Test". format(bin_type, score)) + scale_fill_manual(values={ 'Significant': "#214517", 'Not Significant': '#c62f2d' }))) save_as_pdf_pages(plots, file_name) return
def plot_range_comparison(self, xlabel: str = '', figsize: Tuple[int] = (7, 3), add_text_label: bool = True, **kwargs): df = self.get_ranges_df(**kwargs) fig = (p9.ggplot(df) + p9.aes('cat_value', 'counts', fill='direction') + p9.geom_col(alpha=.8) + p9.theme(figure_size=figsize, axis_text_x=p9.element_text(rotation=45)) + p9.scale_fill_manual(['#3f7f93', '#da3b46', '#4d4a4a']) + p9.labs(x=xlabel, y='Number of Comparisons', fill='R')) if add_text_label: if df.loc[df.direction == 'Positive'].loc[df.counts > 0].size > 0: fig += p9.geom_text( p9.aes(label='label', x='cat_value', y='n + max(n) * .15'), inherit_aes=False, size=9, data=df.loc[df.direction == 'Positive'].loc[df.counts > 0], color='#3f7f93') if df.loc[df.direction == 'Negative'].loc[df.counts > 0].size > 0: fig += p9.geom_text( p9.aes(label='label', x='cat_value', y='n + max(n) * .05'), inherit_aes=False, size=9, data=df.loc[df.direction == 'Negative'].loc[df.counts > 0], color='#da3b46') return fig
def plot_bargraph(count_plot_df, plot_df): """ Plots the bargraph Arguments: count_plot_df - The dataframe that contains lemma counts plot_df - the dataframe that contains the odds ratio and lemmas """ graph = ( p9.ggplot(count_plot_df.astype({"count": int}), p9.aes(x="lemma", y="count")) + p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") + p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') + p9.scale_x_discrete(limits=(plot_df.sort_values( "odds_ratio", ascending=True).lemma.tolist())) + p9.scale_y_continuous(labels=custom_format('{:,.0g}')) + p9.labs(x=None) + p9.theme_seaborn( context='paper', style="ticks", font="Arial", font_scale=0.95) + p9.theme( # 640 x 480 figure_size=(6.66, 5), strip_background=p9.element_rect(fill="white"), strip_text=p9.element_text(size=12), axis_title=p9.element_text(size=12), axis_text_x=p9.element_text(size=10), )) return graph
def plot_zmw_stats(self, **kwargs): """Plot of ZMW stats for all runs. Note ---- Raises an error if :meth:`Summaries.has_zmw_stats` is not `True`. Parameters ---------- ``**kwargs`` : dict Keyword arguments passed to :meth:`Summaries.zmw_stats`. Returns ------- plotnine.ggplot.ggplot Stacked bar graph of ZMW stats for each run. """ df = self.zmw_stats(**kwargs) p = (p9.ggplot(df, p9.aes(x='name', y='number', fill='status')) + p9.geom_col(position=p9.position_stack(reverse=True), width=0.8) + p9.theme(axis_text_x=p9.element_text(angle=90, vjust=1, hjust=0.5), figure_size=(0.4 * len(df['name'].unique()), 2.5) ) + p9.ylab('number of ZMWs') + p9.xlab('') ) if len(df['status'].unique()) < len(CBPALETTE): p = p + p9.scale_fill_manual(CBPALETTE[1:]) return p
def test_col(): # The color indicates reveals the edges and the stacking # that is going on. p = (ggplot(df) + geom_col(aes('x', 'z', fill='factor(z)'), color='black')) assert p + _theme == 'col'
def frequency_TL(Data): print('======= Creating frequency_TL =======') #Filtering Data['date_4'] = Data['date'].dt.date tl4 = Data.groupby("date_4", sort = False, as_index = False).count() tl4 = tl4.iloc[:, 0:2] tl4 = tl4.rename(columns = {"Unnamed: 0": "n"}) sdate = min(tl4["date_4"]) # start date edate = max(tl4["date_4"]) # end date delta = edate - sdate # as timedelta # tl4 = Data.groupby("Date", sort = False, as_index = False).count() # tl4 = tl4.iloc[:, 0:2] # tl4 = tl4.rename(columns = {"Unnamed: 0": "n"}) # tl4['Date'] = pd.to_datetime(tl4['Date']) # #Setting data with missing times # sdate = min(tl4["Date"]) # start date # edate = max(tl4["Date"]) # end date # delta = edate - sdate # as timedelta from datetime import timedelta day = [] for i in range(delta.days + 1): d= sdate + timedelta(days=i) day.append(d) DF = pd.DataFrame(day) DF.columns = ['date_4'] data_with_missing_times = pd.merge(DF, tl4, on='date_4', how='outer') if delta.days > 1825: datebreaks = '18 months' else: if delta.days > 1095: datebreaks = '12 months' else: datebreaks = '6 months' #Creating and saving TL_4 plot =(p9.ggplot(data=data_with_missing_times, mapping=p9.aes(x='date_4',y='n')) + p9.geom_col(fill = 'red') + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks) + p9.labs(x='',y='') ) if (len(data_with_missing_times) > 0): plot.save(filename = 'TL_4.jpeg', plot = plot, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') return(print('=================================frequency_TL DONE ============================='))
def image_histogram(): # create windows cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.namedWindow('image_bw', cv2.WINDOW_NORMAL) cv2.namedWindow('image_bw_eq', cv2.WINDOW_NORMAL) # read and work with image image = cv2.imread(r"image.jpg") image_bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image_bw_eq = cv2.equalizeHist(image_bw) # display images cv2.imshow('image', image) cv2.imshow('image_bw', image_bw) cv2.imshow('image_bw_eq', image_bw_eq) # calculate histogram # np_hist_y, bins = np.histogram(image_bw.ravel(), 256, [0, 256]) # hist = np.bincount(image_bw.ravel(), minlength=256) # faster version of np.histogram # plt.hist(image_bw.ravel(), bins=256) hist_bw = cv2.calcHist([image_bw], [0], None, [256], [0, 255]) hist_bw_eq = cv2.calcHist([image_bw_eq], [0], None, [256], [0, 255]) np_hist_x = np.arange(len(hist_bw)) d = { 'np_hist_x': np_hist_x, 'hist_bw': hist_bw.flatten(), 'hist_bw_eq': hist_bw_eq.flatten() } df = pd.DataFrame(data=d) # plot histogram pn_handle = pn.ggplot(df) + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw'), color=None, fill='red', alpha=0.5) + pn.ylab('occurences') \ + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw_eq'), color=None, fill='green', alpha=0.5) \ + pn.ggtitle('Histograms of bw images') pn_handle.draw() plt.show() while True: pressed_key = cv2.waitKey(16) if pressed_key == ord('q'): break # cleanup opencv cv2.destroyAllWindows()
def test_ordinal_scale(): df = pd.DataFrame({ 'x': pd.Categorical(list('abcd'), ordered=True), 'y': [1, 2, 3, 4] }) p = (ggplot(df) + aes('x', 'y', color='-y', fill='x') + geom_col(size=4) + _theme) assert p + _theme == 'ordinal_scale'
def test_stack_negative(): df = df1.copy() _loc = df.columns.get_loc df.iloc[0, _loc('y')] *= -1 df.iloc[len(df) - 1, _loc('y')] *= -1 p = (ggplot(df) + geom_col(aes('factor(x)', 'y', fill='factor(y)'), position='stack') + geom_text(aes('factor(x)', 'y', label='y'), position=position_stack(vjust=0.5))) assert p + _theme == 'stack-negative'
def test_stack_negative(): df = df1.copy() _loc = df.columns.get_loc df.iloc[0, _loc('y')] *= -1 df.iloc[len(df)-1, _loc('y')] *= -1 p = (ggplot(df) + geom_col(aes('factor(x)', 'y', fill='factor(y)'), position='stack') + geom_text(aes('factor(x)', 'y', label='y'), position=position_stack(vjust=0.5)) ) assert p + _theme == 'stack-negative'
def plot_consecutive(self, alpha: float = .05, **kwargs): f = ( self.sdc_df.loc[lambda dd: dd.p_value < alpha] # Here I make groups of consecutive significant values and report the longest for each lag. .groupby('lag', as_index=True).apply( lambda gdf: gdf.sort_values('start_1').assign( group=lambda dd: (dd.start_1 != dd.start_1.shift(1) + 1). cumsum()).groupby(['group']).size().max()).rename( 'Max Consecutive steps').reset_index().pipe( lambda dd: p9.ggplot(dd) + p9 .aes('lag', 'Max Consecutive steps') + p9.geom_col( ) + p9.theme(**kwargs) + p9.labs(x='Lag [days]'))) return f
def plot_result_stats(results, title): stats = results.describe().unstack().reset_index().rename(columns={ "level_0": "metric", "level_1": "group", 0: "value" }) stats = stats[~stats["group"].isin(["count", "min", "max"])] stats["value_presentation"] = round(stats["value"], 2) plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") + p9.geom_col(position="dodge") + p9.theme_bw() + p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) + p9.geom_text(p9.aes(label="value_presentation"), position=p9.position_dodge(width=0.9), va="bottom")) return plot
def plot_num(df) : x = df.copy() # add group column to the z = x['hist'].to_list() for i in range(len(z)) : z[i]['groups'] = x['col_name'][i] z = pd.concat(z) # generate the plot ggplt = p9.ggplot(z, p9.aes(x = 'value', y = 'prop', group = 'groups'))\ + p9.geom_col()\ + p9.guides(fill = False) \ + p9.ylab('Proportion') \ + p9.xlab('') \ + p9.theme(axis_text_x=p9.element_text(rotation = 45, hjust=1))\ + p9.facet_wrap(facets = ['groups'], ncol = 3, scales = 'free') # return the plot object return ggplt
def protobowl(fold=BUZZER_DEV_FOLD): df_rnn = pickle.load( open("output/buzzer/RNNBuzzer/{}_protobowl.pkl".format(fold), "rb") ) df_rnn = df_rnn.groupby(["Possibility", "Outcome"]) df_rnn = df_rnn.size().reset_index().rename(columns={0: "Count"}) df_rnn["Model"] = pd.Series(["RNN" for _ in range(len(df_rnn))], index=df_rnn.index) df_mlp = pickle.load( open("output/buzzer/MLPBuzzer/{}_protobowl.pkl".format(fold), "rb") ) df_mlp = df_mlp.groupby(["Possibility", "Outcome"]) df_mlp = df_mlp.size().reset_index().rename(columns={0: "Count"}) df_mlp["Model"] = pd.Series(["MLP" for _ in range(len(df_mlp))], index=df_mlp.index) df_thr = pickle.load( open("output/buzzer/ThresholdBuzzer/{}_protobowl.pkl".format(fold), "rb") ) df_thr = df_thr.groupby(["Possibility", "Outcome"]) df_thr = df_thr.size().reset_index().rename(columns={0: "Count"}) df_thr["Model"] = pd.Series( ["Threshold" for _ in range(len(df_thr))], index=df_thr.index ) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15]) df["Outcome"] = df["Outcome"].astype(outcome_type) model_type = CategoricalDtype(categories=["Threshold", "MLP", "RNN"]) df["Model"] = df["Model"].astype(model_type) p = ( ggplot(df) + geom_col(aes(x="Possibility", y="Count", fill="Outcome"), width=0.7) + facet_grid("Model ~") + coord_flip() + theme_fs() + theme(aspect_ratio=0.17) + scale_fill_brewer(type="div", palette=7) ) figure_dir = os.path.join("output/buzzer/{}_protobowl.pdf".format(fold)) p.save(figure_dir)
def cell_division(adata): """ Plots total_counts as a function of the principal circle nodes to visualize the moment of cell division. Parameters ---------------- adata: AnnData The AnnData object being used for the analysis. Must be previously evaluated by `tl.celldiv_moment`. Returns ------------ A plotnine line-plot to help visualize the moment of cell division and direction of the cell cycle. If method = 'counts' when tl.celldiv_moment was run, cell division is defined by the largest drop in total_counts. The changes in counts are represented by the bars at the bottom, and the suggested moment of cell division is marked in red. The cell cycle should follow an incremental increase in total counts until around the moment of cell division. Alternatively, if method='g2m' in tl.celldiv_moment, the G2-M signature dynamics are used to define the moment of cell division. """ ref_var = adata.uns['scycle']['cell_div_moment']['ref_var'] edge_to_0 = adata.uns['scycle']['cell_div_moment']['cell_div_edge'][0] edges = adata.uns['princirc_gr']['edges'] edges['cell_div'] = edges['e1'] == edge_to_0 cell_div_count = edges[edges['e1'] == edge_to_0]['mean_var'] cell_div_plot = (ggplot(edges, aes('e1', 'mean_var')) + geom_point(aes(y = 'mean_var'), size = 2) + geom_path(aes(y = 'mean_var')) + geom_smooth(aes(y = 'mean_var'), method = 'lm', linetype = 'dashed') + annotate("point", x = edge_to_0, y = cell_div_count, color = 'red', size = 2) + labs(x = 'Edge position', y = ref_var) + geom_col(aes(y = 'diff_var', fill = 'cell_div')) + scale_fill_manual(values = ['darkgrey', 'red'], guide = False) + theme_std) return cell_div_plot
def protobowl(fold=BUZZER_DEV_FOLD): df_rnn = pickle.load( open('output/buzzer/RNNBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_rnn = df_rnn.groupby(['Possibility', 'Outcome']) df_rnn = df_rnn.size().reset_index().rename(columns={0: 'Count'}) df_rnn['Model'] = pd.Series(['RNN' for _ in range(len(df_rnn))], index=df_rnn.index) df_mlp = pickle.load( open('output/buzzer/MLPBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_mlp = df_mlp.groupby(['Possibility', 'Outcome']) df_mlp = df_mlp.size().reset_index().rename(columns={0: 'Count'}) df_mlp['Model'] = pd.Series(['MLP' for _ in range(len(df_mlp))], index=df_mlp.index) df_thr = pickle.load( open('output/buzzer/ThresholdBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_thr = df_thr.groupby(['Possibility', 'Outcome']) df_thr = df_thr.size().reset_index().rename(columns={0: 'Count'}) df_thr['Model'] = pd.Series(['Threshold' for _ in range(len(df_thr))], index=df_thr.index) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15]) df['Outcome'] = df['Outcome'].astype(outcome_type) model_type = CategoricalDtype( categories=['Threshold', 'MLP', 'RNN']) df['Model'] = df['Model'].astype(model_type) p = ( ggplot(df) + geom_col(aes(x='Possibility', y='Count', fill='Outcome'), width=0.7) + facet_grid('Model ~') + coord_flip() + theme_fs() + theme(aspect_ratio=0.17) + scale_fill_brewer(type='div', palette=7) ) figure_dir = os.path.join('output/buzzer/{}_protobowl.pdf'.format(fold)) p.save(figure_dir)
def protobowl(fold=BUZZER_DEV_FOLD): df_rnn = pickle.load( open('output/buzzer/RNNBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_rnn = df_rnn.groupby(['Possibility', 'Outcome']) df_rnn = df_rnn.size().reset_index().rename(columns={0: 'Count'}) df_rnn['Model'] = pd.Series(['RNN' for _ in range(len(df_rnn))], index=df_rnn.index) df_mlp = pickle.load( open('output/buzzer/MLPBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_mlp = df_mlp.groupby(['Possibility', 'Outcome']) df_mlp = df_mlp.size().reset_index().rename(columns={0: 'Count'}) df_mlp['Model'] = pd.Series(['MLP' for _ in range(len(df_mlp))], index=df_mlp.index) df_thr = pickle.load( open('output/buzzer/ThresholdBuzzer/{}_protobowl.pkl'.format(fold), 'rb')) df_thr = df_thr.groupby(['Possibility', 'Outcome']) df_thr = df_thr.size().reset_index().rename(columns={0: 'Count'}) df_thr['Model'] = pd.Series(['Threshold' for _ in range(len(df_thr))], index=df_thr.index) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15]) df['Outcome'] = df['Outcome'].astype(outcome_type) model_type = CategoricalDtype(categories=['Threshold', 'MLP', 'RNN']) df['Model'] = df['Model'].astype(model_type) p = (ggplot(df) + geom_col(aes(x='Possibility', y='Count', fill='Outcome'), width=0.7) + facet_grid('Model ~') + coord_flip() + theme_fs() + theme(aspect_ratio=0.17) + scale_fill_brewer(type='div', palette=7)) figure_dir = os.path.join('output/buzzer/{}_protobowl.pdf'.format(fold)) p.save(figure_dir)
def plot_acf(data_in, figure_size=(15, 5)): """ Plots the autocorrelation function Parameteres ----------- data_in : pd.DataFrame Dataframe containing the autcorrelation of our mcmc samples figure_size : tuple, default = (15,5) Optional input for figure size Returns ------- pn.ggplot: Plotnine ggplot object containing autocorrelation plot """ pn.options.figure_size = figure_size plot_out = pn.ggplot(pn.aes(x = 'lag', y = 'autocorrelation'), data = data_in)\ + pn.geom_hline(pn.aes(yintercept= 0))\ + pn.geom_hline(pn.aes(yintercept= 0.05), color = 'red', linetype = 'dashed')\ + pn.geom_hline(pn.aes(yintercept= -0.05), color = 'red', linetype = 'dashed')\ + pn.geom_col() return (plot_out)
(temp_df["time_to_published"].dt.total_seconds() / 60 / 60 / 24).max()) category_half_life # In[14]: g = (p9.ggplot( category_half_life.query("category!='none'").assign( half_life_time=lambda x: pd.to_timedelta(x.half_life_time, "D"), half_life_ci_l=lambda x: pd.to_timedelta(x.half_life_ci_l, "D"), half_life_ci_u=lambda x: pd.to_timedelta(x.half_life_ci_u, "D"), ), p9.aes(x="category", y="half_life_time", ymin="half_life_ci_l", ymax="half_life_ci_u"), ) + p9.geom_col(fill="#1f78b4") + p9.geom_errorbar() + p9.scale_x_discrete( limits=(category_half_life.query("category!='none'").sort_values( "half_life_time").category.tolist()[::-1]), ) + p9.scale_y_timedelta(labels=timedelta_format("d")) + p9.coord_flip() + p9.labs( x="Preprint Categories", y="Time Until 50% of Preprints are Published", title="Preprint Category Half-Life", ) + p9.theme_seaborn(context="paper", style="white", font_scale=1.2) + p9.theme(axis_ticks_minor_x=p9.element_blank(), )) g.save("output/preprint_category_halflife.svg", dpi=250) g.save("output/preprint_category_halflife.png", dpi=250) print(g) # Take home Results: # 1. The average amount of time for half of all preprints to be published is 348 days (~1 year)
datarows.append({ "edges": df.query("pred > @optimal_threshold").hetionet.value_counts()[0], "in_hetionet": "Novel", "relation": rel }) edges_df = pd.DataFrame.from_records(datarows) edges_df # In[11]: import math g = (p9.ggplot(edges_df, p9.aes(x="relation", y="edges", fill="in_hetionet")) + p9.geom_col(position="dodge") + p9.scale_fill_manual(values={ "Existing": color_map["Existing"], "Novel": color_map["Novel"] }) + p9.geom_text(p9.aes(label=( edges_df.apply(lambda x: f"{x['edges']}\n({x['recall']*100:.0f}%)" if not math.isnan(x['recall']) else f"{x['edges']}", axis=1))), position=p9.position_dodge(width=0.9), size=9, va="bottom") + p9.scale_y_log10() + p9.labs(y="# of Edges", x="Relation Type", title="Reconstructing Edges in Hetionet") + p9.guides(fill=p9.guide_legend(title="In Hetionet?")) + p9.theme( axis_text_y=p9.element_blank(),
int(grouped_candidates_pred_df.hetionet.value_counts()[1]), "relation": "DaG" }) datarows.append({ "edges": (grouped_candidates_pred_df.query( "pred_max > 0.5").hetionet.value_counts()[0]), "in_hetionet": "Novel", "relation": "DaG" }) edges_df = pd.DataFrame.from_records(datarows) edges_df # In[20]: g = (p9.ggplot(edges_df, p9.aes(x="relation", y="edges", fill="in_hetionet")) + p9.geom_col(position="dodge") + p9.geom_text(p9.aes(label=( edges_df.apply(lambda x: f"{x['edges']} ({x['recall']*100:.0f}%)" if not math.isnan(x['recall']) else f"{x['edges']}", axis=1))), position=p9.position_dodge( width=1), size=9, va="bottom") + p9.scale_y_log10() + p9.theme(axis_text_y=p9.element_blank(), axis_ticks_major=p9.element_blank(), rect=p9.element_blank())) print(g)
def plot_alt_benefit(plot_df, title='Benefit by Alternative', which='both', sensitivity=False, legend=True): '''Builds a stacked bar chart of the alternative benefits @ param plot_df: The df containing benefits for each alt by the criteria and total benefit @ param title: The title for the graph @ param which: which parts to plot. Acceptable values are 'total' for just total value. 'criteria' for just criteria level stacked bars' 'both' for total and criteria. The graphs will be faceted in this case Returns the ggplot graph to be displayed elsewhere''' _facet = which == 'both' if which == 'both': plot_df = plot_df elif which == 'total': plot_df = plot_df.loc[plot_df['type'] == 'Total Value'] elif which == 'criteria': plot_df = plot_df.loc[plot_df['type'] == 'Weighted Criterion Value'] else: print( which, 'is not an approved value for which.\n Enter "total", "criteria", or "both"' ) return (None) if legend: g = ( p9.ggplot(plot_df, p9.aes(x='Alternative', y='Benefit', fill='Criterion')) + p9.geom_col(stat='identity', position=p9.position_stack( vjust=.5)) # makes stacked bar plot + p9.scale_fill_brewer(type='qual', palette='Paired') ) # changes the color palette to one for qualitative scales) else: g = ( p9.ggplot(plot_df, p9.aes(x='Alternative', y='Benefit', fill='Criterion')) + p9.geom_col( p9.aes(show_legend=False), stat='identity', position=p9.position_stack(vjust=.5)) # makes stacked bar plot + p9.scale_fill_brewer( type='qual', palette='Paired', guide=False ) # changes the color palette to one for qualitative scales + p9.theme(legend_position=None)) # Builds the base plot g = ( g # + p9.geom_col(stat='identity',position=p9.position_stack(vjust=.5)) # makes stacked bar plot # + p9.scale_fill_brewer(type='qual',palette='Paired') # changes the color palette to one for qualitative scales + p9.geom_text(p9.aes(label='print_value'), position=p9.position_stack(vjust=.5), size=6, hjust='center') # adds weighted value to bars + p9.ggtitle(title) # makes the title + p9.theme(axis_text_x=p9.element_text( rotation=45, hjust=1)) # rotates x axis labels ) # Adds the facet if required if sensitivity: if _facet: return ((g + p9.facet_grid('type~Criterion Weight'))) else: return ((g + p9.facet_grid('Criterion Weight~'))) elif _facet: return ((g + p9.facet_grid('~type'))) else: return (g)
def test_labels_series(): p = (ggplot(df, aes(x=df.x, y=df.y)) + geom_col()) assert p.labels == {'x': 'x', 'y': 'y'}
def plot(self): """Plot the figures using R""" df = pandas.DataFrame( self.data, columns=self.datacols, ) with capture_c_msg("datar", prefix=f"[r]{self.title}[/r]: "): df.columns = make_unique(df.columns.tolist()) if self.savedata: datafile = self.outprefix + ".csv" logger.info( "[r]%s[/r]: Saving data to: %r", self.title, datafile, extra={"markup": True}, ) df.to_csv(datafile, index=False) if df.shape[0] == 0: logger.warning("No data points to plot") return aes_for_geom_fill = None aes_for_geom_color = None theme_elems = p9.theme(axis_text_x=p9.element_text(angle=60, hjust=2)) if df.shape[1] > 2: aes_for_geom_fill = p9.aes(fill=df.columns[2]) aes_for_geom_color = p9.aes(color=df.columns[2]) plt = p9.ggplot(df, p9.aes(y=df.columns[0], x=df.columns[1])) if self.figtype == "scatter": plt = plt + p9.geom_point(aes_for_geom_color) theme_elems = None elif self.figtype == "line": pass elif self.figtype == "bar": plt = plt + p9.geom_bar(p9.aes(fill=df.columns[0])) elif self.figtype == "col": plt = plt + p9.geom_col(aes_for_geom_fill) elif self.figtype == "pie": logger.warning("Pie chart is not support by plotnine yet, " "plotting bar chart instead.") col0 = df.iloc[:, 0] if df.shape[1] > 2: plt = plt + p9.geom_bar( p9.aes(x=df.columns[2], y=col0.name, fill=df.columns[2]), stat="identity" # aes_for_geom_fill, # x=df.Group, # y=col0, # label=paste0(round_(100 * col0 / sum_(col0), 1), "%"), # show_legend=False, # position=p9.position_adjust_text(), ) else: col0 = factor(col0, levels=rev(unique(as_character(col0)))) fills = rev(levels(col0)) sums = map(lambda x: sum(col0 == x), fills) print(col0) print(fills) plt = (p9.ggplot(df, p9.aes(x=df.columns[1])) + p9.geom_bar(p9.aes(fill=df.columns[0])) + p9.geom_label( x=1, y=cumsum(sums) - sums / 2, label=paste0(round(sums / sum(sums) * 100, 1), "%"), show_legend=False, )) theme_elems = p9.theme( axis_title_x=p9.element_blank(), axis_title_y=p9.element_blank(), axis_text_y=p9.element_blank(), ) elif self.figtype == "violin": plt = plt + p9.geom_violin(aes_for_geom_fill) elif self.figtype == "boxplot": plt = plt + p9.geom_boxplot(aes_for_geom_fill) elif self.figtype in ("histogram", "density"): plt = p9.ggplot(df, p9.aes(x=df.columns[0])) geom = getattr(p9, f"geom_{self.figtype}") if df.columns[1] != "ONE": plt = plt + geom(p9.aes(fill=df.columns[1]), alpha=0.6) theme_elems = None else: plt = plt + geom(alpha=0.6) theme_elems = p9.theme(legend_position="none") elif self.figtype == "freqpoly": plt = p9.ggplot(df, p9.aes(x=df.columns[0])) if df.columns[1] != "ONE": plt = plt + p9.geom_freqpoly(p9.aes(fill=df.columns[1])) else: plt = plt + p9.geom_freqpoly() theme_elems = None else: raise ValueError(f"Unknown figure type: {self.figtype}") plt = plt + p9.ggtitle(self.title) self.save_plot(plt, theme_elems)
def barchart_make(roi, df, list_rois, config, ylimit, save_function, find_ylim_function): thisroi = list_rois[roi] current_df = df.loc[df['index'] == thisroi] current_df = current_df.sort_values([config.single_roi_fig_x_axis]) current_df = current_df.reset_index( drop=True) # Reset index to remove grouping current_df[config.single_roi_fig_x_axis] = pd.Categorical( current_df[config.single_roi_fig_x_axis], categories=current_df[config.single_roi_fig_x_axis].unique()) figure = ( pltn.ggplot( current_df, pltn.aes(x=config.single_roi_fig_x_axis, y='Mean', ymin="Mean-Conf_Int_95", ymax="Mean+Conf_Int_95", fill='factor({colour})'.format( colour=config.single_roi_fig_colour))) + pltn.theme_538() + pltn.geom_col(position=pltn.position_dodge( preserve='single', width=0.8), width=0.8, na_rm=True) + pltn.geom_errorbar(size=1, position=pltn.position_dodge( preserve='single', width=0.8)) + pltn.labs(x=config.single_roi_fig_label_x, y=config.single_roi_fig_label_y, fill=config.single_roi_fig_label_fill) + pltn.scale_x_discrete(labels=[]) + pltn.theme(panel_grid_major_x=pltn.element_line(alpha=0), axis_title_x=pltn.element_text( weight='bold', color='black', size=20), axis_title_y=pltn.element_text( weight='bold', color='black', size=20), axis_text_y=pltn.element_text(size=20, color='black'), legend_title=pltn.element_text(size=20, color='black'), legend_text=pltn.element_text(size=18, color='black'), subplots_adjust={'right': 0.85}, legend_position=(0.9, 0.8), dpi=config.plot_dpi) + pltn.geom_text(pltn.aes(y=-.7, label=config.single_roi_fig_x_axis), color='black', size=20, va='top') + pltn.scale_fill_manual( values=config.colorblind_friendly_plot_colours)) if ylimit: # Set y limit of figure (used to make it the same for every barchart) figure += pltn.ylim(None, ylimit) thisroi += '_same_ylim' returned_ylim = 0 if config.use_same_axis_limits in ('Same limits', 'Create both') and ylimit == 0: returned_ylim = find_ylim_function(thisroi, figure, 'yaxis') if config.use_same_axis_limits == 'Same limits' and ylimit == 0: return returned_ylim elif ylimit != 0: folder = 'Same_yaxis' else: folder = 'Different_yaxis' save_function(figure, thisroi, config, folder, 'barchart') return returned_ylim
def intensity_graph(Data, Data_m): print('======= Creating intensity_graph =======') x = Data.Intensity[pd.isna(Data.Intensity) == True] if (len(x) == len(Data)): print("WARNING: All values for Intensity are NA's") else: #Filter ever and monthly symptomes and correct Intensity Data_m_int = Data_m[(Data_m.Group == "sy") & (pd.isna(Data_m.Intensity) == 0)] Data_all_int = Data[(Data.Group == "sy") & (pd.isna(Data.Intensity) == 0)] Test_3_m = Data_m_int.groupby("Intensity", sort = True, as_index = False).count() Test_3_m = Test_3_m.iloc[:, 0:2] Test_3_m= Test_3_m.rename(columns = {"Unnamed: 0": "n"}) Test_3 = Data_all_int.groupby("Intensity", sort = True, as_index = False).count() Test_3 = Test_3.iloc[:, 0:2] Test_3 = Test_3.rename(columns = {"Unnamed: 0": "n"}) #Test_3.Intensity = Test_3.Intensity.astype(str) plot =(p9.ggplot(data=Test_3, mapping=p9.aes(x='Intensity',y='n')) + p9.geom_col(fill = 'red') + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.coord_cartesian(xlim = (1,10)) + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11))) + p9.labs(x='',y='No. of attacks') ) plot_month =(p9.ggplot(data=Test_3_m, mapping=p9.aes(x='Intensity',y='n')) + p9.geom_col(fill = 'red') + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.coord_cartesian(xlim = (1,10)) + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11))) + p9.labs(x='',y='No. of attacks') ) #Creating and saving EVER Graph_1 if (len(Data_m_int) > 0): #G1 = graph_1(Data_all_int) plot_month.save(filename = 'Graph_1.jpeg', plot = plot_month, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') if (len(Data_all_int) > 0): #G1 = graph_1(Data_all_int) plot.save(filename = 'Graph_ALL_1.jpeg', plot = plot, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') return(print('=================================intensity_graph DONE ============================='))
def test_reorder(): p = (ggplot(df, aes('reorder(x, y)', 'y', fill='reorder(x, y)')) + geom_col()) assert p + _theme == 'reorder'
def test_labels_lists(): p = (ggplot(df, aes(x=[1, 2, 3], y=[1, 2, 3])) + geom_col()) assert p.labels == {'x': None, 'y': None}
["is_same_paper_1", "is_same_paper_2", "is_same_paper_3"]].mode(axis=1)))) final_annotated_df.head() # In[6]: binned_stats_df = (final_annotated_df.groupby( "distance_bin").final_same_paper.mean().to_frame().rename( index=str, columns={ "final_same_paper": "frac_correct" }).reset_index()) binned_stats_df # In[7]: g = (p9.ggplot(binned_stats_df, p9.aes(x="distance_bin", y="frac_correct")) + p9.geom_col(fill="#a6cee3") + p9.coord_flip() + p9.labs(x="Fraction Correct", y="Euclidean Distance Bins") + p9.theme_seaborn( context="paper", style="ticks", font="Arial", font_scale=1.5)) g.save("output/figures/distance_bin_accuracy.svg") g.save("output/figures/distance_bin_accuracy.png", dpi=250) print(g) # # Logsitic Regression Performance # In[8]: biorxiv_embed_df = (pd.read_csv(Path("../word_vector_experiment/output/") / "word2vec_output/" / "biorxiv_all_articles_300.tsv.xz", sep="\t").set_index("document"))
def test_reorder_index(): # The dataframe is created with ordering according to the y # variable. So the x index should be ordered acc. to y too p = (ggplot(df, aes('reorder(x, x.index)', 'y')) + geom_col()) assert p + _theme == 'reorder_index'
best_result = list(filter(lambda x: x[1] == model.C_, enumerate(model.Cs_)))[0] print(best_result) print("Best CV Fold") print(model.scores_["polka"][:, best_result[0]]) model.scores_["polka"][:, best_result[0]].mean() model_weights_df = pd.DataFrame.from_dict({ "weight": model.coef_[0], "pc": list(range(1, 51)), }) model_weights_df["pc"] = pd.Categorical(model_weights_df["pc"]) model_weights_df.head() g = (p9.ggplot(model_weights_df, p9.aes(x="pc", y="weight")) + p9.geom_col(position=p9.position_dodge(width=5), fill="#253494") + p9.coord_flip() + p9.scale_x_discrete(limits=list(sorted(range(1, 51), reverse=True))) + p9.theme_seaborn( context="paper", style="ticks", font_scale=1.1, font="Arial") + p9.theme(figure_size=(10, 8)) + p9.labs(title="Regression Model Weights", x="Princpial Component", y="Model Weight")) # g.save("output/figures/pca_log_regression_weights.svg") # g.save("output/figures/pca_log_regression_weights.png", dpi=250) print(g) fold_features = model.coefs_paths_["polka"].transpose(1, 0, 2) model_performance_df = pd.DataFrame.from_dict({ "feat_num": ((fold_features.astype(bool).sum(axis=1)) > 0).sum(axis=1), "C":
items = {} data = {} all_models = set() all_tasks = set() data = {} with gzip.open(options.input) as ifd: for row in csv.DictReader(ifd, delimiter="\t"): for k, v in row.items(): data[k] = data.get(k, []) data[k].append(v) for k in data.keys(): floats = [maybe_float(x) for x in data[k]] if all([re.match(r"^\d+$", x) for x in data[k]]): data[k] = [int(x) for x in data[k]] elif all(floats): data[k] = floats df = pandas.DataFrame(data) #print df x = (ggplot(df, aes("factor(%s)" % (options.x), options.y, color="factor(%s)" % (options.color)))) + \ ggtitle(options.title.strip("'")) + \ ylab(options.ylabel.strip("'")) + \ xlab(options.xlabel.strip("'")) + \ labs(color=options.color_label.strip("'")) + \ geom_col(show_legend=False) + \ lims(y=(0.0, 1.0)) x.save(options.output) #theme(legend_title=element_text("")) + \