示例#1
0
def displacement_plot(centered, limits=None, style=None):
    u"""Draws nice displacement plots using ggplot2.

    params:
        centered (pd.DataFrame): needs cX, cY, Object, Frame columns, probably
            produced by calling center() above
        limits (real): Sets the limits of the scales to a square window showing
            ±limits on each axis.
        style (Iterable): Collection of strings. Recognized values are 'theme-bw'
            (which uses theme_bw instead of theme_seaborn) and 'no-terminal-dot'
            (which does not label the end of tracks which terminate early).

    Returns:
        g (gg.ggplot): Plot object
    """
    style = {} if style is None else style
    centered['Object'] = centered['Object'].map(str)
    centered = centered.sort(['Frame', 'Object'])
    g = (gg.ggplot(centered, gg.aes(x='cX', y='cY', color='Object')) +
         gg.geom_path(size=0.3))
    g += gg.theme_bw()  # if 'theme-bw' in style else gg.theme_seaborn()
    if limits:
        g = g + gg.ylim(-limits, limits) + gg.xlim(-limits, limits)
    if 'no-terminal-dot' not in style:
        max_frame = centered['Frame'].max()
        endframe = centered.groupby('Object')['Frame'].max()
        endframe = endframe[endframe != max_frame].reset_index()
        endframe = endframe.merge(centered, on=['Object', 'Frame'])
        # we should check if endframe is empty before adding it:
        # https://github.com/yhat/ggplot/issues/425
        if not endframe.empty:
            g += gg.geom_point(data=endframe, color='black', size=1)
    return g
def plot_update_frequency(result):    
    import pandas as pd
    import numpy
    
    #turns query results into timeseries of chnages
    d = []
    v = []
    for res in result:
        d.append(pd.Timestamp(res['_id']['timestamp']).to_datetime())
        v.append(res['count'])       
        
    ts = pd.DataFrame(v, index = d, columns = ['changes'])
    ts = ts.resample('W', how='sum')
    ts.index.names = ['date']

    import ggplot
    #plots timeseries of changes       
    p = ggplot.ggplot(ts, ggplot.aes(x = ts.index, y=ts['changes'])) +\
            ggplot.geom_point(color = 'blue') +\
            ggplot.xlab('Period') +\
            ggplot.ylab('Changes') +\
            ggplot.geom_smooth() +\
            ggplot.ylim(low = 0) +\
            ggplot.scale_x_date(breaks = ggplot.date_breaks("12 months"),  labels = ggplot.date_format('%Y-%m')) +\
            ggplot.ggtitle('OpenStreetMaps Denver-Boulder\nChanges per Week')
    return p
示例#3
0
    def plot_outcomes(self, chart_title=None, use_ggplot=False):
        """ Plot the outcomes of patients observed.

        :param chart_title: optional chart title. Default is fairly verbose
        :type chart_title: str
        :param use_ggplot: True to use ggplot, else matplotlib
        :type use_ggplot: bool
        :return: a plot of patient outcomes

        """

        if not chart_title:
            chart_title="Each point represents a patient\nA circle indicates no toxicity, a cross toxicity"
            chart_title = chart_title + "\n"

        if use_ggplot:
            if self.size() > 0:
                from ggplot import (ggplot, ggtitle, geom_text, aes, ylim)
                import numpy as np
                import pandas as pd
                patient_number = range(1, self.size()+1)
                symbol = np.where(self.toxicities(), 'X', 'O')
                data = pd.DataFrame({'Patient number': patient_number,
                                     'Dose level': self.doses(),
                                     'DLT': self.toxicities(),
                                     'Symbol': symbol})

                p = ggplot(data, aes(x='Patient number', y='Dose level', label='Symbol')) \
                    + ggtitle(chart_title) + geom_text(aes(size=20, vjust=-0.07)) + ylim(1, 5)
                return p
        else:
            if self.size() > 0:
                import matplotlib.pyplot as plt
                import numpy as np
                patient_number = np.arange(1, self.size()+1)
                doses_given = np.array(self.doses())
                tox_loc = np.array(self.toxicities()).astype('bool')
                if sum(tox_loc):
                    plt.scatter(patient_number[tox_loc], doses_given[tox_loc], marker='x', s=300,
                                facecolors='none', edgecolors='k')
                if sum(~tox_loc):
                    plt.scatter(patient_number[~tox_loc], doses_given[~tox_loc], marker='o', s=300,
                                facecolors='none', edgecolors='k')

                plt.title(chart_title)
                plt.ylabel('Dose level')
                plt.xlabel('Patient number')
                plt.yticks(self.dose_levels())
                p = plt.gcf()
                phi = (np.sqrt(5)+1)/2.
                p.set_size_inches(12, 12/phi)
示例#4
0
def googletrend_command(delta_t, threshold=0.0, inverse=False):
    """the command to run google trend algorithm.

	:param delta_t:   the upper bound for original delta_t parameter
    :param threshold: upper bound for the threshold of differentiating two classes
    :param inverse:   whether to inverse the classifier
	"""
    ## handle filepath and title based on parameter inverse
    filename = "googletrend"
    titlename = "ROC of google trend classifier"
    if inverse:
        filename += "_inverse"
        titlename += " (inverse version)"
    filepath = "./plots/%s.jpg" % filename
    ## generate data first
    data = googletrend.preprocess()
    ## store classifier evaluation metrics into dict
    output = {}
    output['tpr'] = []
    output['fpr'] = []
    output['plot'] = []
    for thre in np.arange(0, threshold + 0.1, 0.1):
        print "==> threshold: %f, inverse: %s" % (thre, inverse)
        for i in xrange(1, int(delta_t)):
            googletrend.algorithm(data, i, thre, inverse)
            tp_rate, fp_rate = googletrend.evaluate(data)
            # print "delta_t: %d, TPR: %f, FPR: %f" % (i, tp_rate, fp_rate)
            output['tpr'].append(tp_rate)
            output['fpr'].append(fp_rate)
            output['plot'].append('thre_' + str(thre))
    ## plot ROC graph
    ## add a y=x baseline for comparison
    output['tpr'].extend([0.0, 1.0])
    output['fpr'].extend([0.0, 1.0])
    output['plot'].extend(['baseline', 'baseline'])
    df = pd.DataFrame(output)
    graph = gg.ggplot(df, gg.aes('fpr', 'tpr', color='plot')) + \
      gg.theme_seaborn() + \
      gg.ggtitle(titlename) + \
         gg.xlab("FPR") + \
         gg.ylab("TPR") + \
         gg.xlim(0.0, 1.0) + \
         gg.ylim(0.0, 1.0) + \
      gg.geom_point() + \
      gg.geom_line()
    gg.ggsave(plot=graph, filename=filepath, width=6, height=6, dpi=100)
dftmp['method']=['(Total-Expected Total)/Expected Total']*dftmp['n_sub'].size
df_stacked = dftmp
#enhancement-based
dftmp = df[['n_sub']+brks[5:10]].melt(id_vars=['n_sub'],value_vars=brks[5:10], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Enhanc-Expected Enhanc)/Expected Enhanc']*dftmp['n_sub'].size
df_stacked = df_stacked.append(dftmp)
#enhancements + full sample background
dftmp = df[['n_sub']+brks[10:]].melt(id_vars=['n_sub'],value_vars=brks[10:], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Enhanc+Expected Backgr-Expected Total)/Expected Total']*dftmp['n_sub'].size
df_stacked = df_stacked.append(dftmp)
df_stacked['percentile']=['{0}th%'.format(a[1:3]) for a in df_stacked['stat']]
#plots
#compare all 3
plt1 = gg.ggplot(df_stacked, gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.facet_wrap('method')+gg.ggtitle('Bias comparison {0}'.format(title))
plt1.save(filename = r'..\charts\drivebias_laqn_{0}.png'.format(species), width=None, height=None, dpi=300)

#plot total alone for presenation
plt2 = gg.ggplot(df_stacked[df_stacked['method']=='(Total-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title))
t = gg.theme_bw()
t._rcParams['font.size']=16
plt2 = plt2+t
plt2.save(filename = r'..\charts\drivebias_laqn_{0}_total.png'.format(species), width=None, height=None, dpi=300)

#plot enhancement alone for presenation
plt3 = gg.ggplot(df_stacked[df_stacked['method']=='(Enhanc+Expected Backgr-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title))
t = gg.theme_bw()
t._rcParams['font.size']=16
plt3 = plt3+t
plt3.save(filename = r'..\charts\drivebias_laqn_{0}_enhanc.png'.format(species), width=None, height=None, dpi=300)
print("Query results loaded into table {0}".format(destinationtable_str))
示例#6
0
文件: crm.py 项目: brockk/clintrials
    def plot_toxicity_probabilities(self, chart_title=None, use_ggplot=False):
        """ Plot prior and posterior dose-toxicity curves.

        :param chart_title: optional chart title. Default is fairly verbose
        :type chart_title: str
        :param use_ggplot: True to use ggplot, else matplotlib
        :type use_ggplot: bool
        :return: plot of toxicity curves

        """

        if not chart_title:
            chart_title = "Prior (dashed) and posterior (solid) dose-toxicity curves"
            chart_title = chart_title + "\n"

        if use_ggplot:
            from ggplot import (ggplot, ggtitle, geom_line, geom_hline, aes, ylim)
            import numpy as np
            import pandas as pd
            data = pd.DataFrame({'Dose level': self.dose_levels(),
                                 'Prior': self.prior,
                                 'Posterior': self.prob_tox(),
                                 #                      'Lower': crm.get_tox_prob_quantile(0.05),
                                 #                      'Upper': crm.get_tox_prob_quantile(0.95)
                                 })
            var_name = 'Type'
            value_name = 'Probability of toxicity'
            melted_data = pd.melt(data, id_vars='Dose level', var_name=var_name, value_name=value_name)
            # melted_data['LineType'] =  np.where(melted_data.Type=='Posterior', '--', np.where(melted_data.Type=='Prior', '-', '..'))
            # melted_data['LineType'] =  np.where(melted_data.Type=='Posterior', '--', np.where(melted_data.Type=='Prior', '-', '..'))
            # melted_data['Col'] =  np.where(melted_data.Type=='Posterior', 'green', np.where(melted_data.Type=='Prior', 'blue', 'yellow'))
            # np.where(melted_data.Type=='Posterior', '--', '-')

            p = ggplot(melted_data, aes(x='Dose level', y=value_name, linetype=var_name)) + geom_line() \
                + ggtitle(chart_title) + ylim(0, 1) + geom_hline(yintercept=self.target, color='black')
            # Can add confidence intervals once I work out linetype=??? in ggplot

            return p
        else:
            import matplotlib.pyplot as plt
            import numpy as np
            dl = self.dose_levels()
            prior_tox = self.prior
            post_tox = self.prob_tox()
            post_tox_lower = self.get_tox_prob_quantile(0.05)
            post_tox_upper = self.get_tox_prob_quantile(0.95)
            plt.plot(dl, prior_tox, '--', c='black')
            plt.plot(dl, post_tox, '-', c='black')
            plt.plot(dl, post_tox_lower, '-.', c='black')
            plt.plot(dl, post_tox_upper, '-.', c='black')
            plt.scatter(dl, prior_tox, marker='x', s=300, facecolors='none', edgecolors='k')
            plt.scatter(dl, post_tox, marker='o', s=300, facecolors='none', edgecolors='k')
            plt.axhline(self.target)
            plt.ylim(0, 1)
            plt.xlim(np.min(dl), np.max(dl))
            plt.xticks(dl)
            plt.ylabel('Probability of toxicity')
            plt.xlabel('Dose level')
            plt.title(chart_title)

            p = plt.gcf()
            phi = (np.sqrt(5) + 1) / 2.
            p.set_size_inches(12, 12 / phi)
示例#7
0
import sys
from pandas.plotting import register_matplotlib_converters

register_matplotlib_converters()

species = 'no2'
df = pd.read_csv(r'.\charts\background_data_melted.csv',
                 index_col='idx',
                 dtype={
                     'timestamp': 'str',
                     'vidperiod': 'str',
                     'type': 'str',
                     'param': 'str',
                     'value': 'float64'
                 })
print(df[:10])
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %H:%M:%S")
#plots
plt1 = gg.ggplot(df, gg.aes(
    x='timestamp', y='value', color='type')) + gg.geom_line() + gg.xlab(
        'Time') + gg.ylab('Concentration') + gg.theme_bw() + gg.ylim(
            0, 100) + gg.facet_wrap('vidperiod', scales='free') + gg.ggtitle(
                'Regional background comparison {0}'.format(species))
#+gg.theme(axis_text_x=gg.element_text(angle=20))
plt1.save(filename=r'.\charts\background_{0}_ggtest_{1}.png'.format(
    species,
    dt.datetime.today().strftime('%Y%b%d')),
          width=None,
          height=None,
          dpi=300)
示例#8
0
        total = len(model2scores[model][plot_key_name])
        for value in model2scores[model][plot_key_name]:
            plot_dataset.append(
                [model, value, the_mean / total, the_std, the_max, the_min])

    plot_dataset_pd = pd.DataFrame(
        plot_dataset,
        columns=['model', 'value', 'weight', 'std', 'max', 'min'])

    if 'logloss' in plot_key_name:

        p = ggplot.ggplot(ggplot.aes(x = 'model', fill = 'model', weight = 'weight'), data = plot_dataset_pd) +\
        ggplot.geom_bar(position = 'stack', width = 4) +\
        ggplot.geom_errorbar(ggplot.aes(x = 'model', y = 'value')) +\
        ggplot.ylim(0 ,5.05) +\
        ggplot.ggtitle(plot_key_name)

        #print(p)

    elif 'time' in plot_key_name:

        p = ggplot.ggplot(ggplot.aes(x = 'model', fill = 'model', weight = 'weight'), data = plot_dataset_pd) +\
        ggplot.geom_bar(position = 'stack', width = 4) +\
        ggplot.geom_errorbar(ggplot.aes(x = 'model', y = 'value')) +\
        ggplot.ggtitle(plot_key_name)

        #print(p)

    else:
示例#9
0
文件: convnet.py 项目: smoly/bubo
        tile(w_from_figure_wh_ratio, norm(data)),
        '%s-layer-acts-%s-%s-(i=%s)' % (img_desc, layer, show_tuple_tight(data.shape), batch_i),
    )

conv_layers = filter(lambda (layer, acts): len(acts.data.shape) == 4, net.blobs.items())
fc_layers   = filter(lambda (layer, acts): len(acts.data.shape) != 4, net.blobs.items())

# Plot conv acts
for layer, acts in conv_layers:
    plot_conv_acts(layer, acts)

# Plot fc acts
df = pd.concat([
    pd.DataFrame({'act': acts.data[batch_i], 'layer': layer}).reset_index()
    for layer, acts in fc_layers
])
plot_gg(gg_layer(
    gg.ggplot(df, gg.aes(y='act', x='index')),
    gg.geom_point(alpha=.5),
    gg.facet_wrap(x='layer', scales='free'),
    gg.ggtitle('%s layer acts fc/prob points (i=%s)' % (img_desc, batch_i)),
))
plot_gg(gg_layer(
    gg.ggplot(df, gg.aes(x='act')),
    gg.geom_histogram(bins=25, size=0),
    gg.facet_wrap(x='layer', scales='free'),
    gg.scale_y_log(),
    gg.ylim(low=0.1),
    gg.ggtitle('%s layer acts fc/prob histo (i=%s)' % (img_desc, batch_i)),
))
示例#10
0
                         value_vars=['p05', 'p25', 'p50', 'p75', 'p95'],
                         var_name='yparam',
                         value_name='value')
    print(c['name'])
    #print(df_a)

    #plots
    #split percentiles into different charts, all sites
    #plt1 = gg.ggplot(df_along, gg.aes(x='n_passes',y='value',color='site_str'))+gg.geom_point()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.xlim(0,100)+gg.facet_wrap('yparam',scales='free_y')
    #plt1.save(filename = r'..\charts\bias_{0}.png'.format(c['name']), width=None, height=None, dpi=200)
    #n_segments
    plt2 = gg.ggplot(
        df_a, gg.aes(x='n_passes', y='n_segments', color='site_str')
    ) + gg.geom_line() + gg.xlab('n, number drive periods') + gg.ylab(
        'Sample size (number of drive patterns)') + gg.theme_bw() + gg.xlim(
            0, 35) + gg.ylim(0, 2000)
    plt2.save(filename=r'..\charts\n_segments_{0}_{1}.png'.format(
        c['name'], dtstamp),
              width=None,
              height=None,
              dpi=200)
    #combine percentiles, split sites
    plt3 = gg.ggplot(
        df_along, gg.aes(x='n_passes', y='value', color='yparam')
    ) + gg.geom_line() + gg.xlab('n, number of drive periods') + gg.ylab(
        'Sample error (%)') + gg.theme_bw() + gg.xlim(0, 35) + gg.ylim(
            -100, 100) + gg.geom_hline(
                y=25, linetype="dashed", color="gray") + gg.geom_hline(
                    y=-25, linetype="dashed", color="gray") + gg.geom_vline(
                        x=[10, 15], linetype="dashed",
                        color="gray") + gg.scale_color_manual(
示例#11
0
文件: core.py 项目: TimZehta/rpgdice
def main():
    global args, ruleset
    # Arguments Parser
    argparser, subparser = parser_setup()
    register_rules(subparser)
    args = argparser.parse_args()
    rulemod = sys.modules["rpgdice.rulesets.%s" % args.ruleset]
    rulemod.prepare(args, srand)

    if args.debug:
        print "DEBUG: args", args
        print

    results = list()
    pool = multiprocessing.Pool()
    try:
        for result in pool.map(rulemod.simulate_rolls, rulemod.variables):
            results.extend(result)
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        sys.exit(130)
    if args.debug:
        print "DEBUG: results:"
        pprint(results)
        print

    conf = dict()
    conf = {"vlab": "Variables", "xlab": "Outcome", "ylab": "Probability %"}
    for item in conf:
        try:
            conf[item] = getattr(rulemod, item)
        except:
            pass

    columns = ("Graph", conf["vlab"], conf["xlab"], "Count", conf["ylab"])
    data = pandas.DataFrame.from_records(results, columns=columns)

    # Create and save graphs
    for gkey in rulemod.graphs:
        # Graph Defaults
        graph_conf = conf.copy()
        graph_conf["file_prefix"] = "%s%02d" % (args.ruleset, gkey)
        graph_conf["file_suffix"] = str()
        # colors
        colors_lower = ["#ff0000", "#cc0000", "#993300", "#666600"]
        colors_upper = ["#006666", "#003399", "#0000cc", "#0000ff"]
        colors_mid = ["#000000"]
        color_count = len(rulemod.variables) - 1
        if color_count % 2 == 0:
            lower_slice = (color_count / 2) * -1
            upper_slice = color_count / 2
        else:
            lower_slice = ((color_count - 1) / 2) * -1
            upper_slice = (color_count + 1) / 2
        graph_conf["color_list"] = colors_lower[lower_slice:] + colors_mid + colors_upper[0:upper_slice]

        # graph_conf from graph
        graph_items = (
            "color_list",
            "file_prefix",
            "file_suffix",
            "graph_type",
            "limits",
            "x_breaks",
            "x_labels",
            "title",
            "vlab",
            "xlab",
            "ylab",
        )
        for item in graph_items:
            try:
                graph_conf[item] = rulemod.graphs[gkey][item]
            except:
                try:
                    graph_conf[item] = getattr(rulemod, item)
                except:
                    if item not in graph_conf:
                        graph_conf[item] = None
        if args.debug:
            print "DEBUG: graph_conf:"
            pprint(graph_conf)
            print

        # plot_data
        plot_data = data.copy()
        plot_data = plot_data[plot_data["Graph"] == gkey]
        plot_data.rename(
            columns={
                conf["vlab"]: graph_conf["vlab"],
                conf["xlab"]: graph_conf["xlab"],
                conf["ylab"]: graph_conf["ylab"],
            },
            inplace=True,
        )
        plot_data.index = range(1, len(plot_data) + 1)
        if args.debug:
            print "DEBUG: plot_data:"
            pprint(plot_data)
            print

        # Create plot
        if args.graph:
            plot = (
                ggplot.ggplot(
                    ggplot.aes(x=graph_conf["xlab"], y=graph_conf["ylab"], color=graph_conf["vlab"]), data=plot_data
                )
                + ggplot.ggtitle(graph_conf["title"])
                + ggplot.theme_gray()
                + ggplot.scale_colour_manual(values=graph_conf["color_list"])
            )
            plot.rcParams["font.family"] = "monospace"
            if graph_conf["x_breaks"] and graph_conf["x_labels"]:
                plot += ggplot.scale_x_discrete(breaks=graph_conf["x_breaks"], labels=graph_conf["x_labels"])
            if graph_conf["limits"]:
                plot += ggplot.ylim(graph_conf["limits"][0], graph_conf["limits"][1])
            if graph_conf["graph_type"] == "bars":
                plot += ggplot.geom_line(size=20)
                text_data = plot_data[plot_data["Count"] > 0]
                text_data.index = range(0, len(text_data))
                outcomes = dict(text_data[graph_conf["xlab"]])
                percents = dict(text_data[graph_conf["ylab"]])
                for k in outcomes:
                    percent = "%4.1f%%" % percents[k]
                    x = outcomes[k]
                    y = percents[k] + 4
                    color = graph_conf["color_list"][k]
                    plot += ggplot.geom_text(label=[percent], x=[x, x + 1], y=[y, y - 1], color=color)
            else:
                plot += ggplot.geom_line()
                plot += ggplot.geom_point(alpha=0.3, size=50)
            if hasattr(rulemod, "update_plot"):
                plot = rulemod.update_plot(gkey, graph_conf, plot, plot_data)
            if args.dumpsave:
                filename = "/dev/null"
            else:
                filename = "%s%s.png" % (graph_conf["file_prefix"], graph_conf["file_suffix"])
            ggplot.ggsave(filename, plot, format="png", dpi=300)

    return 0
示例#12
0
def plot_vol(dates, x, cp, my_domain):
    # -------------------- Prepare for Plotting -------------------------- #
    # Prepare DataFrame objects for graphing
    #Add a column for the label to show in the legend in the graph
    #Need to reshape it, from (124,) to (124,1) for exmple, so that it
    #will concatenate. This gives a df with [date, vol_data, 'Volume']
    v = ['Volume' for i in xrange(x.shape[0])]
    #df_domain = np.concatenate((x, v), axis=1)
    ndf_vol = np.transpose(np.array([dates, x, v]))
    df_vol = pd.DataFrame(ndf_vol, columns=['Date', 'Volume', 'Data'])

    #Create pre-allocated lists for plotting means and cp
    xmin_list = [0 for i in xrange(len(cp))]  #hold lft pt of vol_mean
    xmax_list = [0 for i in xrange(len(cp))]  #hold rt pt of vol_mean
    yint_list = [0 for i in xrange(len(cp))]  #holds vol_means
    cp_date_list = [0 for i in xrange(len(cp))]  #holds date for cp
    cp_value_list = [0 for i in xrange(len(cp))]  #holds cp value

    ref_idx = 0  #used to keep track of vol_means
    #collect list data for plotting
    for i in xrange(len(cp)):
        cp_idx = cp[i][0] - 1  #-1 b/c 1-indexed (includes cp itself)
        xmin_list[i] = dates[ref_idx].toordinal()  #convert to match ggplot
        xmax_list[i] = dates[cp_idx].toordinal()  #convert to match ggplot
        yint_list[i] = cp[i][2]  #use value from_mean for vol_mean
        cp_date_list[i] = dates[cp_idx]  #date of cp
        #cp_value_list[i] = x[cp_idx] #value of cp
        cp_value_list[i] = cp[i][2]
        ref_idx = cp_idx + 1  #+1 b/c moving to next point

    #Reform lists into a data frame and attach to df_domains. The first two
    #lists can be created together since they are both numeric, but if I try
    #to create all three together all types will be downgraded to strings.
    #np.concatenate avoids this conversion. The transpose is needed to take
    #an item from each to form a single row.
    cp_lbl = ['Change Point' for i in xrange(len(yint_list))]

    #Need to create a dummy entry to put 'Volume Mean' into legend
    cp_date_list.append(dates[0])
    yint_list.append(x[0])
    cp_lbl.append('Volume Mean')
    ndf_cp = np.transpose(np.array([cp_date_list, yint_list, cp_lbl]))
    yint_list.pop(-1)
    cp_date_list.pop(-1)
    df_cp = pd.DataFrame(ndf_cp, columns=['Date', 'Volume', 'Data'])

    df_plot = pd.concat((df_vol, df_cp), axis=0)

    #Need to create a dummy entry to put 'Volume Mean' into legend
    #dummy = np.array([dates[0], x[0], 'Volume Mean']).reshape(1,-1)
    #df_cp = np.concatenate( (df_cp, dummy), axis=0) #add to bottom df_cp
    #df_domain = np.concatenate( (df_domain, df_cp), axis=0 ) #add df_domains

    #convert final array into a pd.DataFrame for printing and plotting
    #df_domain = pd.DataFrame(df_domain, columns=['Date','Volume','Data'])
    #df_domain.to_html(open('out.html','w'))
    #os.system('sudo cp out.html /usr/local/www/analytics/rwing')

    margin = 0.10 * (np.max(x) - np.min(x))
    p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \
            ggplot.geom_line(color='blue',size=2) + \
            ggplot.geom_point(x=xmax_list, y=cp_value_list, color='black', \
                        shape='D', size=50) + \
            ggplot.geom_hline(xmin=xmin_list, \
                        xmax=xmax_list, \
                        yintercept=yint_list, color="red", size=3) + \
            ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \
            ggplot.scale_colour_manual(values = ["black", "blue", "red"]) + \
            ggplot.scale_y_continuous(labels='comma') + \
            ggplot.ylim(low=np.min(x)-margin/4.0, high=np.max(x)+margin) + \
            ggplot.xlab("Week (Marked on Mondays)") + \
            ggplot.ylab("Message Vol") + \
            ggplot.ggtitle("%s\nMessage Volume by Week" % my_domain) + \
            ggplot.theme_seaborn()

    return p
示例#13
0
ggplot.ggplot(rr, ggplot.aes(x='Iteration', y='Max')) + ggplot.geom_boxplot()

vals = []
n_val = []
iterations = map(lambda x: str(x), range(2, 13))
for iteration in iterations:
    data_i = data[iteration]
    vals += (data_i == 0).sum().tolist()
    n_val += [iteration] * data_i.shape[1]

rr = pandas.DataFrame([n_val, vals]).T
rr.columns = ['Iteration', 'Equal to 0']

ggplot.ggplot(rr, ggplot.aes(x='Iteration',
                             y='Equal to 0')) + ggplot.geom_boxplot()

vals = []
n_val = []
iterations = map(lambda x: str(x), range(2, 13))
for iteration in iterations:
    data_i = data[iteration]
    vals += data_i.quantile(0.99).tolist()
    n_val += [iteration] * data_i.shape[1]

rr = pandas.DataFrame([n_val, vals]).T
rr.columns = ['Iteration', 'Median']

ggplot.ggplot(rr, ggplot.aes(
    x='Iteration', y='Median')) + ggplot.geom_boxplot() + ggplot.ylim(
        0, 0.00025)