def graph(df): graph = (ggplot(data=df, mapping=aes(x='Time', y='NDVI')) + geom_line(size =2, color = 'green') +geom_point() +theme_linedraw() + theme(axis_text_x= element_text(rotation=45, hjust=1)) +scales.ylim(0,1) + geom_area(fill = "green", alpha = .4) ) return graph
def all_stack(fold=BUZZER_DEV_FOLD): df_rnn = stack('output/buzzer/RNNBuzzer', 'RNN', fold) df_mlp = stack('output/buzzer/MLPBuzzer', 'MLP', fold) df_thr = stack('output/buzzer/ThresholdBuzzer', 'Threshold', fold) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) model_type = CategoricalDtype(categories=['Threshold', 'MLP', 'RNN']) df['Model'] = df['Model'].astype(model_type) p = (ggplot(df) + geom_area(aes(x='Position', y='Frequency', fill='Buzzing')) + facet_grid('~ Model') + theme_fs() + theme(aspect_ratio=1) + scale_fill_brewer(type='div', palette=7)) p.save('output/buzzer/{}_stack.pdf'.format(fold))
def all_stack(fold=BUZZER_DEV_FOLD): df_rnn = stack("output/buzzer/RNNBuzzer", "RNN", fold) df_mlp = stack("output/buzzer/MLPBuzzer", "MLP", fold) df_thr = stack("output/buzzer/ThresholdBuzzer", "Threshold", fold) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) model_type = CategoricalDtype(categories=["Threshold", "MLP", "RNN"]) df["Model"] = df["Model"].astype(model_type) p = ( ggplot(df) + geom_area(aes(x="Position", y="Frequency", fill="Buzzing")) + facet_grid("~ Model") + theme_fs() + theme(aspect_ratio=1) + scale_fill_brewer(type="div", palette=7) ) p.save("output/buzzer/{}_stack.pdf".format(fold))
def all_stack(fold=BUZZER_DEV_FOLD): df_rnn = stack('output/buzzer/RNNBuzzer', 'RNN', fold) df_mlp = stack('output/buzzer/MLPBuzzer', 'MLP', fold) df_thr = stack('output/buzzer/ThresholdBuzzer', 'Threshold', fold) df = df_rnn.append(df_mlp, ignore_index=True) df = df.append(df_thr, ignore_index=True) model_type = CategoricalDtype( categories=['Threshold', 'MLP', 'RNN']) df['Model'] = df['Model'].astype(model_type) p = ( ggplot(df) + geom_area(aes(x='Position', y='Frequency', fill='Buzzing')) + facet_grid('~ Model') + theme_fs() + theme( aspect_ratio=1, ) + scale_fill_brewer(type='div', palette=7) ) p.save('output/buzzer/{}_stack.pdf'.format(fold))
def test_area_aesthetics(): p = (ggplot(df, aes('x', 'ymax+2', group='factor(z)')) + geom_area() + geom_area(aes('x+width', alpha='z')) + geom_area(aes('x+2*width', linetype='factor(z)'), color='black', fill=None, size=2) + geom_area(aes('x+3*width', color='z'), fill=None, size=2) + geom_area(aes('x+4*width', fill='factor(z)')) + geom_area(aes('x+5*width', size='z'), color='black', fill=None) + scale_x_continuous( breaks=[i * 2 * np.pi for i in range(7)], labels=['0'] + [r'${}\pi$'.format(2 * i) for i in range(1, 7)])) assert p + _theme == 'area_aesthetics'
def test_area_aesthetics(): p = (ggplot(df, aes('x', 'ymax+2', group='factor(z)')) + geom_area() + geom_area(aes('x+width', alpha='z')) + geom_area(aes('x+2*width', linetype='factor(z)'), color='black', fill=None, size=2) + geom_area(aes('x+3*width', color='z'), fill=None, size=2) + geom_area(aes('x+4*width', fill='factor(z)')) + geom_area(aes('x+5*width', size='z'), color='black', fill=None) + scale_x_continuous( breaks=[i*2*np.pi for i in range(7)], labels=['0'] + [r'${}\pi$'.format(2*i) for i in range(1, 7)]) ) assert p + _theme == 'area_aesthetics'
def eval(fold=BUZZER_DEV_FOLD): if not os.path.isdir(report_dir): os.mkdir(report_dir) valid = read_data(fold) print('# {} data: {}'.format(fold, len(valid))) valid_iter = chainer.iterators.SerialIterator(valid, args.batch_size, repeat=False, shuffle=False) args.n_input = valid[0][1][0].shape[0] model = RNNBuzzer(args.n_input, args.n_layers, args.n_hidden, args.n_output, args.dropout) chainer.serializers.load_npz(args.model_path, model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() predictions = [] buzzes = dict() for batch in tqdm(valid_iter): qids, vectors, labels, positions = list(map(list, zip(*batch))) batch = convert_seq(batch, device=args.gpu) preds = model.predict(batch['xs'], softmax=True) preds = [p.tolist() for p in preds] predictions.extend(preds) for i in range(len(qids)): buzzes[qids[i]] = [] for pos, pred in zip(positions[i], preds[i]): buzzes[qids[i]].append((pos, pred)) buzzes[qids[i]] = list(map(list, zip(*buzzes[qids[i]]))) buzz_dir = os.path.join(buzzes_dir.format(fold)) with open(buzz_dir, 'wb') as f: pickle.dump(buzzes, f) results = dict() for example_idx in range(len(valid)): qid, vectors, labels, positions = valid[example_idx] preds = predictions[example_idx] q_len = positions[-1] for i, pos in enumerate(positions): rel_pos = int(100 * pos / q_len) if rel_pos not in results: results[rel_pos] = [] results[rel_pos].append((labels[i], preds[i][1])) freq = {'x': [], 'y': [], 'type': []} for k, rs in results.items(): rs, scores = list(map(list, zip(*rs))) freq['x'].append(k / 100) freq['y'].append(sum(rs) / len(rs)) freq['type'].append('acc') freq['x'].append(k / 100) freq['y'].append(sum(x > 0.5 for x in scores) / len(scores)) freq['type'].append('0.5') freq['x'].append(k / 100) freq['y'].append(sum(x > 0.3 for x in scores) / len(scores)) freq['type'].append('0.3') freq['x'].append(k / 100) freq['y'].append(sum(x > 0.7 for x in scores) / len(scores)) freq['type'].append('0.7') freq_df = pd.DataFrame(freq) p0 = ggplot(freq_df) + geom_smooth(aes(x='x', y='y', color='type')) p0.save(os.path.join(report_dir, '{}_acc_buzz.pdf'.format(fold))) stack_freq = {'x': [], 'y': [], 'type': []} threshold = 0.5 for k, rs in results.items(): num = len(rs) only_oracle = sum((c == 1 and b <= threshold) for c, b in rs) only_buzzer = sum((c == 0 and b > threshold) for c, b in rs) both = sum((c == 1 and b > threshold) for c, b in rs) neither = sum((c == 0 and b <= threshold) for c, b in rs) stack_freq['x'].append(k / 100) stack_freq['y'].append(only_oracle / num) stack_freq['type'].append('only_oracle') stack_freq['x'].append(k / 100) stack_freq['y'].append(only_buzzer / num) stack_freq['type'].append('only_buzzer') stack_freq['x'].append(k / 100) stack_freq['y'].append(both / num) stack_freq['type'].append('both') stack_freq['x'].append(k / 100) stack_freq['y'].append(neither / num) stack_freq['type'].append('neither') stack_freq_df = pd.DataFrame(stack_freq) p1 = ggplot(stack_freq_df) + geom_area(aes(x='x', y='y', fill='type')) p1.save(os.path.join(report_dir, '{}_stack_area.pdf'.format(fold)))
sensitivities.append(0) especifities_1.append(0) #para que al plotearlo acabe en la diagonal #pintamos ahora la curva import matplotlib.pyplot as plt """%matplotlib inline plt.plot(especifities_1,sensitivities, marker="o", linestyle="--", color="r") x=[i*0.01 for i in range(100)] y=[i*0.01 for i in range(100)] plt.plot(x,y) #pinto la diagonal (el peor modelo que existe) plt.xlabel("1-Especificidad") plt.ylabel("Sensibilidad") plt.title("Curva ROC") #recordemos que mi seleccion de variables era una mierda absoluta """ #cuanto mayor sea el área entre la curva y la diagonal, mejor es el modelo predictivo from sklearn import metrics from plotnine import ggplot, aes, geom_line, geom_area, ggtitle, xlim, ylim #si quiero importar todo pongo solo * espec_1, sensit, _ = metrics.roc_curve(Y_test, prob) df = pd.DataFrame({"x": espec_1, "y": sensit}) auc = metrics.auc(espec_1, sensit) #área bajo la curva print(df.head()) print( ggplot(df, aes(x="x", y="y")) + geom_line() + geom_line(linetype="dashed") + xlim(-0.01, 1.01) + ylim(-0.01, 1.01)) print( ggplot(df, aes(x="x", y="y")) + geom_area(alpha=0.25) + geom_line(aes(y="y")) + ggtitle("Curva ROC y AUC=%s " % str(auc)))
def area_plot(df, x, y, group=None, facet_x=None, facet_y=None, aggfun='sum', fill=False, sort_groups=True, base_size=10, figure_size=(6, 3)): ''' Aggregates data in df and plots as a stacked area chart. Parameters ---------- df : pd.DataFrame input dataframe x : str quoted expression to be plotted on the x axis y : str quoted expression to be plotted on the y axis group : str quoted expression to be used as group (ie color) facet_x : str quoted expression to be used as facet facet_y : str quoted expression to be used as facet aggfun : str or fun function to be used for aggregating (eg sum, mean, median ...) fill : bool plot shares for each group instead of absolute values sort_groups : bool sort groups by the sum of their value (otherwise alphabetical order is used) base_size : int base size for theme_ez figure_size :tuple of int figure size Returns ------- g : EZPlot EZplot object ''' # create a copy of the data dataframe = df.copy() # define groups and variables; remove and store (eventual) names names = {} groups = {} variables = {} for label, var in zip(['x', 'group', 'facet_x', 'facet_y'], [x, group, facet_x, facet_y]): names[label], groups[label] = unname(var) names['y'], variables['y'] = unname(y) # fix special cases if x == '.index': groups['x'] = '.index' names[ 'x'] = dataframe.index.name if dataframe.index.name is not None else '' # aggregate data and reorder columns gdata = agg_data(dataframe, variables, groups, aggfun, fill_groups=True) gdata['y'].fillna(0, inplace=True) gdata = gdata[[ c for c in ['x', 'y', 'group', 'facet_x', 'facet_y'] if c in gdata.columns ]] if fill: groups_to_normalize = [ c for c in ['x', 'facet_x', 'facet_y'] if c in gdata.columns ] total_values = gdata \ .groupby(groups_to_normalize)['y'] \ .sum() \ .reset_index() \ .rename(columns = {'y':'tot_y'}) gdata = pd.merge(gdata, total_values, on=groups_to_normalize) gdata['y'] = gdata['y'] / (gdata['tot_y'] + EPSILON) gdata.drop('tot_y', axis=1, inplace=True) ylabeller = percent_labels else: ylabeller = ez_labels # get plot object g = EZPlot(gdata) # determine order and create a categorical type if sort_groups: sort_data_groups(g) # get colors colors = np.flip(ez_colors(g.n_groups('group'))) # set groups if group is None: g += p9.geom_area(p9.aes(x="x", y="y"), colour=None, fill=ez_colors(1)[0], na_rm=True) else: g += p9.geom_area(p9.aes(x="x", y="y", group="factor(group)", fill="factor(group)"), colour=None, na_rm=True) g += p9.scale_fill_manual(values=colors) # set facets if facet_x is not None and facet_y is None: g += p9.facet_wrap('~facet_x') if facet_x is not None and facet_y is not None: g += p9.facet_grid('facet_y~facet_x') # set x scale if g.column_is_timestamp('x'): g += p9.scale_x_datetime() elif g.column_is_categorical('x'): g += p9.scale_x_discrete() else: g += p9.scale_x_continuous(labels=ez_labels) # set y scale g += p9.scale_y_continuous(labels=ylabeller, expand=[0, 0, 0.1 * (not fill) + 0.03, 0]) # set axis labels g += \ p9.xlab(names['x']) + \ p9.ylab(names['y']) # set theme g += theme_ez(figure_size=figure_size, base_size=base_size, legend_title=p9.element_text(text=names['group'], size=base_size)) if sort_groups: g += p9.guides(fill=p9.guide_legend(reverse=True), color=p9.guide_legend(reverse=True)) return g
+ plt9.geom_line() + plt9.geom_hline(yintercept=len(pokemon)) + plt9.ylim(0, len(pokemon)) ) num_unique_pokemon_plot.save(args.num_unique_pokemon_plot, dpi=300) print("Output:", args.num_unique_pokemon_plot) data_2 = simulation_data.to_num_missing_data_frame() num_missing_pokemon_plot = ( plt9.ggplot( data_2[data_2["case_id"] == 0], plt9.aes("roll_num", "num_missing", fill="rarity"), ) + plt9.geom_area() + plt9.geom_hline(yintercept=len(pokemon)) + plt9.ylim(0, len(pokemon)) + plt9.scale_fill_hue( name="Rarity", labels=[ "Common", "Uncommon", "Rare", "Very rare", "Legendary", "Ultra beast", ], ) + plt9.xlab("Num rolls (exculding extra rolls)") + plt9.ylab("Num Pokémon missing")
def main(): mpl.rc('mathtext', fontset='cm') warnings.filterwarnings('ignore', r'(geom|position)_\w+ ?: Removed \d+ rows') warnings.filterwarnings('ignore', r'Saving .+ x .+ in image') warnings.filterwarnings('ignore', r'Filename: .+\.png') df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_f') + titles('P_f(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pf_Ob_Ol') df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_f') + titles('P_f(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pf_Ob_σ') df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_q') + titles('P_q(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pq_Ob_Ol') df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_q') + titles('P_q(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pq_Ob_σ') df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'O_l', 'Opr') + titles("O'(O_b, O_l)") + limits((1, 10), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'Opr_Ob_Ol') df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'σ', 'Opr') + titles("O'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'Opr_Ob_σ') df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)}) .assign(Pf=lambda x: Opr_Pf(x.Opr))) save_both(my_plot(df, 'Opr', 'Pf') + titles("P_f(O')") + labs("O'", 'P_f') + limits((1, 20), (0, 1), xbreaks=np.linspace(2, 20, 10), ybreaks=np.linspace(0, 1, 11)) + gg.geom_line() + gg.geom_hline(yintercept=C, linetype='dashed', color='grey') , 'Pf_Opr') df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11)) save_both(my_plot(df, 'O_b', 'σ', 'σpr') + titles("σ'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'σpr_Ob_σ') df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)}) .assign(Pq=lambda x: σpr_Pq(x.σpr))) save_both(my_plot(df, 'σpr', 'Pq') + titles("P_q(σ')") + labs("σ'", 'P_q') + limits((0, 20), (-1, 0), xbreaks=np.linspace(0, 20, 11), ybreaks=np.linspace(-1, 0, 11)) + gg.geom_line() , 'Pq_σpr') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_free') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_free') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_qual') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_qual') df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f')) df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q')) df = pd.concat((df_Pf, df_Pq), ignore_index=True) df.drop_duplicates('O_b', inplace=True) Opr = df_Pf.query('σ==0').O_b[0] σpr = df_Pq.query('O_b==1').σ[0] labels = pd.DataFrame({ 'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3], 'label': ["$O'$", "$σ'$", mathrm('More profit')] }) lab_aes = gg.aes('x', 'y', label='label') save_both( gg.ggplot(df, gg.aes(x='O_b', y='σ')) + gg.geom_area(gg.aes(fill='profit'), alpha=0.3) + gg.geom_vline(xintercept=Opr, linetype='dashed') + gg.geom_hline(yintercept=σpr, linetype='dashed') # text alignment can't be specified in an aes + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top') + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom') + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom') + gg.scale_fill_discrete(name=mathrm('Bet type'), labels=[mathrm('Free'), mathrm('Qualifying')]) + limits((1, 10), (0, 5)) + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'), mathrm('more profitable'), mathrm('space'))) + labs('O_b', 'σ') , 'Px_shapes')