def production_envelope(self, dataframe, grid=None, width=None, height=None, title=None, points=None, points_colors=None, palette=None, x_axis_label=None, y_axis_label=None): palette = self.get_option('palette') if palette is None else palette width = self.get_option('width') if width is None else width colors = self._palette(palette, len(dataframe.strain.unique())) plot = aes(data=dataframe, ymin="lb", ymax="ub", x="value", color=scale_colour_manual(colors)) + geom_area() if title: plot += geom_tile(title) if x_axis_label: plot += scale_x_continuous(name=x_axis_label) if y_axis_label: plot += scale_y_continuous(name=y_axis_label) return plot
def signature_data_plot(sd): import ggplot as gg aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r') return gg.ggplot(aes, data=sd) \ + gg.geom_point(size=15) \ + gg.scale_color_gradient(low='yellow', high='red') \ + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \ + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
def plot_timeline(scenes): # Plot character vs scene timelime # NB: due to limitations in Python ggplot we need to plot with scene on y-axis # in order to label x-ticks by character. # scale_x_continuous and scale_y_continuous behave slightly differently. print (gg.ggplot(gg.aes(y='scene', x='character_code'), data=scenes) + gg.geom_point() + gg.labs(x='Character', y='Scene') + gg.scale_x_continuous( labels=scenes['character'].cat.categories.values.tolist(), breaks=range(len(scenes['character'].cat.categories))) + gg.theme(axis_text_x=gg.element_text(angle=30, hjust=1, size=10)))
def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title='Scatter', xaxis_label=None, yaxis_label=None): color = self.__default_options__.get('palette', None) if color is None else color width = self.__default_options__.get('width', None) if width is None else width gg = ggplot(dataframe, aes(x, y)) + geom_point(color=color, alpha=0.6) + ggtitle(title) if xaxis_label: gg += scale_x_continuous(name=xaxis_label) if yaxis_label: gg += scale_y_continuous(name=xaxis_label) return gg
def plot_age_speed(df): num_rows = df.shape[0] title = 'age v speed' print ggplot(df, aes(s.AGE_COL_NAME, s.SPEED_COL_NAME)) + \ ggtitle(_make_title(title, num_rows))+ \ geom_point(colour='steelblue') + \ scale_x_continuous( # breaks=[10,20,30], # labels=["horrible", "ok", "awesome"] ) return df
def plot_distance_trip_time(df): num_rows = df.shape[0] title = 'trip duration v distance travelled' print ggplot(df, aes(s.TRIP_DURATION_COL, s.DISTANCE_TRAVELED_COL_NAME)) + \ ggtitle(_make_title(title, num_rows))+ \ stat_smooth(colour="red") + \ geom_point(colour='steelblue') + \ scale_x_continuous( # breaks=[10,20,30], #labels=["horrible", "ok", "awesome"] ) return df
def graph1(score_data): """ Average score as time goes on; Creates and returns graph 1, a line graph. """ date_column = score_data[0][find_time_stamp(score_data)] data = DataFrame(score_data[1:], columns=score_data[0]) # Get all columns that arlabels = date_format("%Y-%m-%d")e numerical # questions so we know what to graph num_questions = data.select_dtypes(include=['int64']).columns.values # Melt data so that each question is in a seperate row new_data = pd.melt(data, id_vars=date_column, value_vars=num_questions, var_name="Question", value_name="Score") # Convert date string into an actual date type new_data[date_column] = pd.to_datetime(new_data[date_column], format="%m/%d/%Y") # Group all rows with same date and question, and then take the average. new_data = new_data.groupby([date_column, 'Question']).mean().reset_index() new_data['All'] = "Indiviual Questions" new_data2 = new_data.groupby(date_column).mean().reset_index() new_data2['Question'] = "All Questions" new_data2['All'] = "Average of All Questions" new_data = pd.concat([new_data, new_data2]) new_data[date_column] = new_data[date_column].astype('int64') # Create time graph with seperate lines for each question ret = ggplot.ggplot(ggplot.aes(x=date_column, y="Score", colour="Question"), new_data) +\ ggplot.geom_point() +\ ggplot.geom_line() +\ ggplot.facet_grid("All") +\ ggplot.scale_x_continuous(labels=[""], breaks=0) +\ ggplot.labs(x="Time", y="Average Question Score") +\ ggplot.ggtitle("Question Scores Over Time") return ret
def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title='Scatter', xaxis_label=None, yaxis_label=None, label=None): color = self.__default_options__.get('palette', None) if color is None else color width = self.__default_options__.get('width', None) if width is None else width gg = ggplot(dataframe, aes(x, y)) + geom_point( color=color, alpha=0.6) + ggtitle(title) if xaxis_label: gg += scale_x_continuous(name=xaxis_label) if yaxis_label: gg += scale_y_continuous(name=xaxis_label) return gg
values_dict = { "significant": coefficients[feature]["significant"], "insignificant": coefficients[feature]["unsignificant"] } df = pd.DataFrame.from_dict(values_dict, orient='index') df = df.transpose() df = pd.melt(df) df['feature'] = feature dfs_to_concat.append(df) master_df = pd.concat(dfs_to_concat) # histogram p = ggplot(aes(x='value', fill='variable', color='variable'), data=master_df) p += geom_histogram(bins=25, alpha=0.5) p += scale_x_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitude distribution") p += facet_wrap("feature", ncol=3, scales="free") p += labs(x=" ", y=" ") # visuals t = theme_gray() t._rcParams['font.size'] = 10 t._rcParams['font.family'] = 'monospace' p += t p.save("arima_1/" + "histogram.png") # boxplot p = ggplot(aes(x='variable', y='value'), data=master_df) p += geom_boxplot()
import pandas as pd import numpy as np # from source import view_and_print_output import ggplot as gg df = pd.DataFrame() for num_layers, num_nodes in [(2, 50), (2, 100), (2, 150), (2, 200), (4, 50), (4, 100), (4, 150), (4, 200)]: file_coarse = '../../data/coarse_lambda_dropout_' + str(num_layers) + '_' + str(num_nodes) + '.txt' newdata = pd.read_csv(file_coarse) newdata = newdata.sort_values(by='validation error', ascending=True) newdata['lambda'] = np.log10(newdata['lambda']) newdata['index'] = (np.arange(len(newdata), dtype='float')/len(newdata))**3 newdata['config'] = str(num_layers * 100 + num_nodes) + ' ' + str(num_layers) + ' ' + str(num_nodes) df = df.append(newdata) print(df.sort_values(by='validation error', ascending=False).head(20)) p = gg.ggplot(gg.aes(x='lambda', y='dropout prob', color='index'), data=df) + \ gg.geom_point() + \ gg.xlab('lambda') + \ gg.ylab('dropout prob') + \ gg.scale_x_continuous(limits=(-5, 2)) + \ gg.facet_wrap('config') print(p) # Conclusion: ignore dropout
t * len(count_tops) + len(count_tops))) probs_list.append(probs_t) # Calculate KL divergences kl_mle_list.append(stats.entropy(true_bins_t, mle_probs_vals)) kl_nn_list.append(stats.entropy(true_bins_t, nn_probs_t)) probs = pd.concat(probs_list) # In[44]: probs_tail = probs[probs.Tenor > 360] gg.ggplot(probs_tail, gg.aes(x='Count Top', weight='Probs True') ) + gg.facet_grid('Tenor') + gg.geom_bar() + gg.geom_step( gg.aes(y='Probs MLE', color='red')) + gg.geom_step( gg.aes(y='Probs NN', color='blue')) + gg.scale_x_continuous( limits=(0, len(count_tops))) # In[57]: # KL divergences kl_df = pd.DataFrame({ 'Tenor': range(0, t_end + 1), 'KL MLE': kl_mle_list, 'KL NN': kl_nn_list }) print kl_df.head() print kl_df.tail() #% # Plot KL divergences
def main(): parser = argparse.ArgumentParser(description="Draws displacement plots.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--limits', type=int, help="Maximum extent of the axes") parser.add_argument('--no-plots', action='store_true', help="Don't save plots") parser.add_argument('--summary', help='Save summary stats by file') parser.add_argument('--imagetype', '-i', default='png', help="Extension to use for plots") parser.add_argument('--pixels-per-micron', '--pixels', '-p', default=1.51, type=float, help="Pixels per µm (length scale of tracked images)") parser.add_argument('--minutes-per-frame', '--minutes', '-m', default=10, type=float, help="Minutes between each frame of the tracked images") parser.add_argument('--plot-titles', type=argparse.FileType('r'), help="CSV file with filename and title columns") parser.add_argument('--style', action='append', default=[], choices=['theme-bw', 'no-terminal-dot'], help='Change style options for the plot.') parser.add_argument('--tick-breaks', '--ticks', '-t', nargs=3, type=int, metavar=('start', 'end', 'step'), help="Beginning and end tick breaks on displacement plots") parser.add_argument('--plot-text', type=int, default=8, help='Plot text size (pt)') parser.add_argument('--plot-height', type=float, default=1.81, help='Plot height (in)') parser.add_argument('--plot-width', type=float, default=2.5, help='Plot width (in)') parser.add_argument('infile', nargs='+', help="File(s) to process.") args = parser.parse_args() style = {argument: True for argument in args.style} plot_titles = pd.read_csv(args.plot_titles, index_col="filename") if args.plot_titles else None all_dfs = [] for filename in args.infile: # there has to be a better pattern for this try: df = read_mtrackj_mdf(filename) except ValueError: try: df = read_mtrack2(filename) except Exception: df = read_manual_track(filename) centered = center(df) centered.to_csv(filename + '.centered') if not args.no_plots: g = displacement_plot(centered, limits=args.limits, style=style) g += gg.theme(axis_text=gg.element_text(size=args.plot_text)) g += gg.labs(x='px', y='px') if args.tick_breaks: g += gg.scale_x_continuous(breaks=range(*args.tick_breaks)) g += gg.scale_y_continuous(breaks=range(*args.tick_breaks)) if plot_titles is not None and filename in plot_titles.index: g += gg.labs(title=plot_titles.ix[filename, 'title']) g.save('{}.{}'.format(filename, args.imagetype), width=args.plot_width, height=args.plot_height) centered['filename'] = filename all_dfs.append(centered) mega_df = pd.concat(all_dfs, ignore_index=True) stats_for = lambda x: stats(x, length_scale=args.pixels_per_micron, time_scale=args.minutes_per_frame) obj_stats = (mega_df.groupby('filename', sort=False) .apply(stats_for) .reset_index()) summary_by_file = obj_stats.groupby('filename').apply(summary) if args.summary: summary_by_file.to_csv(args.summary, index=False) print("# Produced by {} at {}".format(' '.join(sys.argv), time.ctime())) print("# {} pixels per micron, {} minutes per frame". format(args.pixels_per_micron, args.minutes_per_frame)) print("# distance units are microns; velocity units are microns/hour") obj_stats.to_csv(sys.stdout, index=False)