Пример #1
0
    def production_envelope(self,
                            dataframe,
                            grid=None,
                            width=None,
                            height=None,
                            title=None,
                            points=None,
                            points_colors=None,
                            palette=None,
                            x_axis_label=None,
                            y_axis_label=None):

        palette = self.get_option('palette') if palette is None else palette
        width = self.get_option('width') if width is None else width
        colors = self._palette(palette, len(dataframe.strain.unique()))

        plot = aes(data=dataframe,
                   ymin="lb",
                   ymax="ub",
                   x="value",
                   color=scale_colour_manual(colors)) + geom_area()
        if title:
            plot += geom_tile(title)
        if x_axis_label:
            plot += scale_x_continuous(name=x_axis_label)
        if y_axis_label:
            plot += scale_y_continuous(name=y_axis_label)

        return plot
Пример #2
0
def signature_data_plot(sd):
    import ggplot as gg

    aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r')
    return gg.ggplot(aes, data=sd) \
        + gg.geom_point(size=15) \
        + gg.scale_color_gradient(low='yellow', high='red') \
        + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \
        + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
Пример #3
0
def signature_data_plot(sd):
    import ggplot as gg

    aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r')
    return gg.ggplot(aes, data=sd) \
        + gg.geom_point(size=15) \
        + gg.scale_color_gradient(low='yellow', high='red') \
        + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \
        + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
Пример #4
0
def plot_timeline(scenes):
    # Plot character vs scene timelime
    # NB: due to limitations in Python ggplot we need to plot with scene on y-axis
    # in order to label x-ticks by character.
    # scale_x_continuous and scale_y_continuous behave slightly differently.

    print (gg.ggplot(gg.aes(y='scene', x='character_code'), data=scenes) +
            gg.geom_point() + gg.labs(x='Character', y='Scene') +
           gg.scale_x_continuous(
               labels=scenes['character'].cat.categories.values.tolist(),
           breaks=range(len(scenes['character'].cat.categories))) +
           gg.theme(axis_text_x=gg.element_text(angle=30, hjust=1, size=10)))
Пример #5
0
    def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title='Scatter', xaxis_label=None,
                yaxis_label=None):
        color = self.__default_options__.get('palette', None) if color is None else color
        width = self.__default_options__.get('width', None) if width is None else width

        gg = ggplot(dataframe, aes(x, y)) + geom_point(color=color, alpha=0.6) + ggtitle(title)
        if xaxis_label:
            gg += scale_x_continuous(name=xaxis_label)
        if yaxis_label:
            gg += scale_y_continuous(name=xaxis_label)

        return gg
Пример #6
0
def plot_age_speed(df):
    num_rows = df.shape[0]
    title = 'age v speed'

    print ggplot(df, aes(s.AGE_COL_NAME, s.SPEED_COL_NAME)) + \
            ggtitle(_make_title(title, num_rows))+ \
            geom_point(colour='steelblue') + \
            scale_x_continuous(
                    # breaks=[10,20,30],
                    # labels=["horrible", "ok", "awesome"]
                    )

    return df 
Пример #7
0
def plot_distance_trip_time(df):
    num_rows = df.shape[0]
    title = 'trip duration v distance travelled'

    print ggplot(df, aes(s.TRIP_DURATION_COL, s.DISTANCE_TRAVELED_COL_NAME)) + \
            ggtitle(_make_title(title, num_rows))+ \
            stat_smooth(colour="red") + \
            geom_point(colour='steelblue') + \
            scale_x_continuous(
                    # breaks=[10,20,30], 
                    #labels=["horrible", "ok", "awesome"]
                    )

    return df 
Пример #8
0
def graph1(score_data):
    """ Average score as time goes on;
        Creates and returns graph 1, a line graph. """

    date_column = score_data[0][find_time_stamp(score_data)]

    data = DataFrame(score_data[1:], columns=score_data[0])

    # Get all columns that arlabels = date_format("%Y-%m-%d")e numerical
    # questions so we know what to graph
    num_questions = data.select_dtypes(include=['int64']).columns.values

    # Melt data so that each question is in a seperate row
    new_data = pd.melt(data,
                       id_vars=date_column,
                       value_vars=num_questions,
                       var_name="Question",
                       value_name="Score")

    # Convert date string into an actual date type
    new_data[date_column] = pd.to_datetime(new_data[date_column],
                                           format="%m/%d/%Y")

    # Group all rows with same date and question, and then take the average.
    new_data = new_data.groupby([date_column, 'Question']).mean().reset_index()
    new_data['All'] = "Indiviual Questions"

    new_data2 = new_data.groupby(date_column).mean().reset_index()
    new_data2['Question'] = "All Questions"
    new_data2['All'] = "Average of All Questions"

    new_data = pd.concat([new_data, new_data2])

    new_data[date_column] = new_data[date_column].astype('int64')

    # Create time graph with seperate lines for each question
    ret = ggplot.ggplot(ggplot.aes(x=date_column, y="Score", colour="Question"), new_data) +\
        ggplot.geom_point() +\
        ggplot.geom_line() +\
        ggplot.facet_grid("All") +\
        ggplot.scale_x_continuous(labels=[""], breaks=0) +\
        ggplot.labs(x="Time", y="Average Question Score") +\
        ggplot.ggtitle("Question Scores Over Time")
    return ret
Пример #9
0
    def scatter(self,
                dataframe,
                x=None,
                y=None,
                width=None,
                height=None,
                color=None,
                title='Scatter',
                xaxis_label=None,
                yaxis_label=None,
                label=None):
        color = self.__default_options__.get('palette',
                                             None) if color is None else color
        width = self.__default_options__.get('width',
                                             None) if width is None else width

        gg = ggplot(dataframe, aes(x, y)) + geom_point(
            color=color, alpha=0.6) + ggtitle(title)
        if xaxis_label:
            gg += scale_x_continuous(name=xaxis_label)
        if yaxis_label:
            gg += scale_y_continuous(name=xaxis_label)

        return gg
Пример #10
0
    values_dict = {
        "significant": coefficients[feature]["significant"],
        "insignificant": coefficients[feature]["unsignificant"]
    }
    df = pd.DataFrame.from_dict(values_dict, orient='index')
    df = df.transpose()
    df = pd.melt(df)
    df['feature'] = feature
    dfs_to_concat.append(df)

master_df = pd.concat(dfs_to_concat)

# histogram
p = ggplot(aes(x='value', fill='variable', color='variable'), data=master_df)
p += geom_histogram(bins=25, alpha=0.5)
p += scale_x_continuous(limits=(-25, 25))
p += ggtitle("sarimax coefficient magnitude distribution")
p += facet_wrap("feature", ncol=3, scales="free")
p += labs(x=" ", y=" ")

# visuals
t = theme_gray()
t._rcParams['font.size'] = 10
t._rcParams['font.family'] = 'monospace'

p += t
p.save("arima_1/" + "histogram.png")

# boxplot
p = ggplot(aes(x='variable', y='value'), data=master_df)
p += geom_boxplot()
Пример #11
0
import pandas as pd
import numpy as np
# from source import view_and_print_output
import ggplot as gg


df = pd.DataFrame()
for num_layers, num_nodes in [(2, 50), (2, 100), (2, 150), (2, 200), (4, 50), (4, 100), (4, 150), (4, 200)]:
    file_coarse = '../../data/coarse_lambda_dropout_' + str(num_layers) + '_' + str(num_nodes) + '.txt'
    newdata = pd.read_csv(file_coarse)
    newdata = newdata.sort_values(by='validation error', ascending=True)
    newdata['lambda'] = np.log10(newdata['lambda'])
    newdata['index'] = (np.arange(len(newdata), dtype='float')/len(newdata))**3
    newdata['config'] = str(num_layers * 100 + num_nodes) +  ' ' +  str(num_layers) + ' ' + str(num_nodes)
    df = df.append(newdata)
print(df.sort_values(by='validation error', ascending=False).head(20))
p = gg.ggplot(gg.aes(x='lambda', y='dropout prob', color='index'), data=df) + \
        gg.geom_point() + \
        gg.xlab('lambda') + \
        gg.ylab('dropout prob') + \
        gg.scale_x_continuous(limits=(-5, 2)) + \
        gg.facet_wrap('config')
print(p)

# Conclusion: ignore dropout
Пример #12
0
                    t * len(count_tops) + len(count_tops)))
    probs_list.append(probs_t)
    # Calculate KL divergences
    kl_mle_list.append(stats.entropy(true_bins_t, mle_probs_vals))
    kl_nn_list.append(stats.entropy(true_bins_t, nn_probs_t))

probs = pd.concat(probs_list)

# In[44]:

probs_tail = probs[probs.Tenor > 360]

gg.ggplot(probs_tail, gg.aes(x='Count Top', weight='Probs True')
          ) + gg.facet_grid('Tenor') + gg.geom_bar() + gg.geom_step(
              gg.aes(y='Probs MLE', color='red')) + gg.geom_step(
                  gg.aes(y='Probs NN', color='blue')) + gg.scale_x_continuous(
                      limits=(0, len(count_tops)))

# In[57]:

# KL divergences

kl_df = pd.DataFrame({
    'Tenor': range(0, t_end + 1),
    'KL MLE': kl_mle_list,
    'KL NN': kl_nn_list
})

print kl_df.head()
print kl_df.tail()
#%
# Plot KL divergences
Пример #13
0
def main():
    parser = argparse.ArgumentParser(description="Draws displacement plots.",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--limits', type=int, help="Maximum extent of the axes")
    parser.add_argument('--no-plots', action='store_true', help="Don't save plots")
    parser.add_argument('--summary', help='Save summary stats by file')
    parser.add_argument('--imagetype', '-i', default='png', help="Extension to use for plots")
    parser.add_argument('--pixels-per-micron', '--pixels', '-p', default=1.51, type=float,
                        help="Pixels per µm (length scale of tracked images)")
    parser.add_argument('--minutes-per-frame', '--minutes', '-m', default=10, type=float,
                        help="Minutes between each frame of the tracked images")
    parser.add_argument('--plot-titles', type=argparse.FileType('r'),
                        help="CSV file with filename and title columns")
    parser.add_argument('--style', action='append', default=[],
                        choices=['theme-bw', 'no-terminal-dot'],
                        help='Change style options for the plot.')
    parser.add_argument('--tick-breaks', '--ticks', '-t', nargs=3, type=int,
                        metavar=('start', 'end', 'step'),
                        help="Beginning and end tick breaks on displacement plots")
    parser.add_argument('--plot-text', type=int, default=8,
                        help='Plot text size (pt)')
    parser.add_argument('--plot-height', type=float, default=1.81,
                        help='Plot height (in)')
    parser.add_argument('--plot-width', type=float, default=2.5,
                        help='Plot width (in)')
    parser.add_argument('infile', nargs='+', help="File(s) to process.")
    args = parser.parse_args()

    style = {argument: True for argument in args.style}

    plot_titles = pd.read_csv(args.plot_titles, index_col="filename") if args.plot_titles else None

    all_dfs = []
    for filename in args.infile:
        # there has to be a better pattern for this
        try:
            df = read_mtrackj_mdf(filename)
        except ValueError:
            try:
                df = read_mtrack2(filename)
            except Exception:
                df = read_manual_track(filename)
        centered = center(df)
        centered.to_csv(filename + '.centered')
        if not args.no_plots:
            g = displacement_plot(centered, limits=args.limits, style=style)
            g += gg.theme(axis_text=gg.element_text(size=args.plot_text))
            g += gg.labs(x='px', y='px')
            if args.tick_breaks:
                g += gg.scale_x_continuous(breaks=range(*args.tick_breaks))
                g += gg.scale_y_continuous(breaks=range(*args.tick_breaks))
            if plot_titles is not None and filename in plot_titles.index:
                g += gg.labs(title=plot_titles.ix[filename, 'title'])
            g.save('{}.{}'.format(filename, args.imagetype),
                   width=args.plot_width, height=args.plot_height)
        centered['filename'] = filename
        all_dfs.append(centered)
    mega_df = pd.concat(all_dfs, ignore_index=True)
    stats_for = lambda x: stats(x, length_scale=args.pixels_per_micron,
                                time_scale=args.minutes_per_frame)
    obj_stats = (mega_df.groupby('filename', sort=False)
                        .apply(stats_for)
                        .reset_index())
    summary_by_file = obj_stats.groupby('filename').apply(summary)
    if args.summary:
        summary_by_file.to_csv(args.summary, index=False)
    print("# Produced by {} at {}".format(' '.join(sys.argv), time.ctime()))
    print("# {} pixels per micron, {} minutes per frame".
          format(args.pixels_per_micron, args.minutes_per_frame))
    print("# distance units are microns; velocity units are microns/hour")
    obj_stats.to_csv(sys.stdout, index=False)