Exemplo n.º 1
0
def displacement_plot(centered, limits=None, style=None):
    u"""Draws nice displacement plots using ggplot2.

    params:
        centered (pd.DataFrame): needs cX, cY, Object, Frame columns, probably
            produced by calling center() above
        limits (real): Sets the limits of the scales to a square window showing
            ±limits on each axis.
        style (Iterable): Collection of strings. Recognized values are 'theme-bw'
            (which uses theme_bw instead of theme_seaborn) and 'no-terminal-dot'
            (which does not label the end of tracks which terminate early).

    Returns:
        g (gg.ggplot): Plot object
    """
    style = {} if style is None else style
    centered['Object'] = centered['Object'].map(str)
    centered = centered.sort(['Frame', 'Object'])
    g = (gg.ggplot(centered, gg.aes(x='cX', y='cY', color='Object')) +
         gg.geom_path(size=0.3))
    g += gg.theme_bw()  # if 'theme-bw' in style else gg.theme_seaborn()
    if limits:
        g = g + gg.ylim(-limits, limits) + gg.xlim(-limits, limits)
    if 'no-terminal-dot' not in style:
        max_frame = centered['Frame'].max()
        endframe = centered.groupby('Object')['Frame'].max()
        endframe = endframe[endframe != max_frame].reset_index()
        endframe = endframe.merge(centered, on=['Object', 'Frame'])
        # we should check if endframe is empty before adding it:
        # https://github.com/yhat/ggplot/issues/425
        if not endframe.empty:
            g += gg.geom_point(data=endframe, color='black', size=1)
    return g
Exemplo n.º 2
0
def get_plot(X, ids, cluster_dct):

    new_X = []
    new_ids = []
    for (i, nid) in enumerate(ids):
        if nid in cluster_dct.keys():
            new_X.append(X[i, :])
            new_ids.append(nid)
    X = np.array(new_X)
    ids = np.array(new_ids)

    print(X.shape, ids.shape)
    feat_cols = ['pixel' + str(i) for i in range(X.shape[1])]

    df = pd.DataFrame(X, columns=feat_cols)

    n_sne = 480000

    rndperm = ids
    # print(len(rndperm), rndperm)

    # rndperm_o = np.random.permutation(df.shape[0])
    # time_start = time.time()

    tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
    # tsne_results = tsne.fit_transform(df.loc[rndperm[:n_sne],feat_cols].values)
    tsne_results = tsne.fit_transform(df.values)
    df_tsne = df.copy()
    df_tsne['x-tsne'] = tsne_results[:, 0]
    df_tsne['y-tsne'] = tsne_results[:, 1]
    # print(df_tsne)
    print(max(tsne_results[:, 0]), min(tsne_results[:, 0]),
          max(tsne_results[:, 1]), min(tsne_results[:, 1]))

    kmeans = KMeans(n_clusters=10, random_state=0).fit(tsne_results)

    # ids = ids[rndperm[:n_sne]]
    # print(ids[0], cluster_dct[ids[0]])
    labels = [cluster_dct[i] for i in ids]
    y = np.array(labels)
    print("y", y, y.shape)
    # y = np.array(kmeans.labels_)

    df_tsne['label'] = y
    df_tsne['label'] = df_tsne['label'].apply(lambda i: str(i))


    chart = ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label') ) \
            + geom_point(size=5,alpha=0.5) \
      + theme_bw()\
      + ggtitle("tSNE dimensions of steam graph embeddings")

    # ggsave()
    print(chart)
Exemplo n.º 3
0
    def _setup_ggplot(self):
        try:
            from ggplot import theme_bw
            t = theme_bw()
            for k, v in t.get_rcParams().iteritems():
                mpl.rcParams[k] = v

            def plot_post_hook():
                for ax in self.figure.axes:
                    t.post_plot_callback(ax)
            self._post_plot_hook = plot_post_hook
        except ImportError:
            pass
        except AttributeError:
            pass
Exemplo n.º 4
0
def plotAlignmentStat(input, output):
    """plot Alignment summary using ggplot"""
    df = pd.read_csv(input, thousands=",")
    # replace % with '' and convert the type to float
    #df.replace('%', '', regex=True)
    print df.dtypes
    # convert to numeric
    #df1=df.apply(pd.to_numeric, args=('coerce',))
    # Get certain rows
    print df
    df = df.iloc[[2, 4, 5], ]
    #df = df.ix[['Uniquely mapped reads %', 'Number of reads mapped to multiple loci %', 'Reads unmapped: too short %']]
    dfm = pd.melt(df,
                  id_vars=['category'],
                  var_name='sampleName',
                  value_name='Value')

    print dfm
    #from ggplot import *
    #import pandas as pd
    #df = pd.DataFrame({"x":[1,2,3,4], "y":[1,3,4,2]})
    #ggplot(aes(x="x", weight="y"), df) + geom_bar()
    #ggplot(diamonds, aes(x='price', fill='cut')) + geom_histogram() +  theme_bw() + scale_color_brewer(type='qual')

    from ggplot import ggplot, geom_bar, aes, theme_bw, ggtitle, coord_flip, geom_histogram  #,scale_y_continuous,coord_flip
    p = ggplot(dfm, aes(x='sampleName', weight='Value',
                        fill='category')) + geom_bar() + theme_bw() + ggtitle(
                            "Alignment Summary stats") + coord_flip(
                            )  # + scale_y_continuous(labels='comma

    #p = ggplot(dfm, aes(x='sampleName', weight='Value', fill='category')) + geom_bar(position = "stack", stat='identity') + theme_bw() + ggtitle("Alignment Summary stats")  + coord_flip()# + scale_y_continuous(labels='comma') + coord_flip()
    #p = ggplot(df, aes(x = "category", y = "value", fill = "variable")) + \
    #geom_bar(stat="bar", labels=df["category"].tolist()) + \
    #theme(axis_text_x = element_text(angle=90))
    dirname, filename = os.path.split(output)
    print dirname
    print filename
    p.save(output)
    #ggsave(plot=p, filename=filename, path=dirname)
    return
#total-based
dftmp = df[['n_sub']+brks[:5]].melt(id_vars=['n_sub'],value_vars=brks[:5], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Total-Expected Total)/Expected Total']*dftmp['n_sub'].size
df_stacked = dftmp
#enhancement-based
dftmp = df[['n_sub']+brks[5:10]].melt(id_vars=['n_sub'],value_vars=brks[5:10], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Enhanc-Expected Enhanc)/Expected Enhanc']*dftmp['n_sub'].size
df_stacked = df_stacked.append(dftmp)
#enhancements + full sample background
dftmp = df[['n_sub']+brks[10:]].melt(id_vars=['n_sub'],value_vars=brks[10:], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Enhanc+Expected Backgr-Expected Total)/Expected Total']*dftmp['n_sub'].size
df_stacked = df_stacked.append(dftmp)
df_stacked['percentile']=['{0}th%'.format(a[1:3]) for a in df_stacked['stat']]
#plots
#compare all 3
plt1 = gg.ggplot(df_stacked, gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.facet_wrap('method')+gg.ggtitle('Bias comparison {0}'.format(title))
plt1.save(filename = r'..\charts\drivebias_laqn_{0}.png'.format(species), width=None, height=None, dpi=300)

#plot total alone for presenation
plt2 = gg.ggplot(df_stacked[df_stacked['method']=='(Total-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title))
t = gg.theme_bw()
t._rcParams['font.size']=16
plt2 = plt2+t
plt2.save(filename = r'..\charts\drivebias_laqn_{0}_total.png'.format(species), width=None, height=None, dpi=300)

#plot enhancement alone for presenation
plt3 = gg.ggplot(df_stacked[df_stacked['method']=='(Enhanc+Expected Backgr-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title))
t = gg.theme_bw()
t._rcParams['font.size']=16
plt3 = plt3+t
plt3.save(filename = r'..\charts\drivebias_laqn_{0}_enhanc.png'.format(species), width=None, height=None, dpi=300)
    )
    group by pod_id_location
""")
qry_job = bqclient.query(qry_str, location='EU', job_config=job_config)
#save result as dataframe
df = qry_job.to_dataframe()
df_long = df.melt(id_vars=['pod_str', 'pod_idx'],
                  value_vars=['p05', 'p25', 'med', 'p75', 'p95'],
                  var_name='yparam',
                  value_name='value')
#plots
#plt1 = gg.ggplot(df, gg.aes(x='date_UTC',y='no2_ppb'))+gg.geom_line()+gg.xlab('Time')+gg.ylab('NO2 (ppb)')+gg.theme_bw()+gg.facet_wrap('pod_id_location',scales='free_y')
#plt1.save(filename = r'.\charts\ulezpodts.png', width=None, height=None, dpi=200)
plt2 = gg.ggplot(df_long, gg.aes(
    x='pod_str', y='value', color='yparam')) + gg.geom_point() + gg.xlab(
        'pod') + gg.ylab('NO2 (as % of median)') + gg.theme_bw() + gg.theme(
            figure_size=(12, 6)) + gg.scale_x_discrete()
plt2.save(filename=r'.\charts\ulezpodvar.png', width=10, height=6, dpi=200)

#repeat for mobile data using segid instead of podid where N = 10 and N = 40
#repeat for stationary data at mobile times
qry_str = ("""
    with cte0 as (
    --all data, ULEZ pods with 6000 hrs
    select date_UTC, a.pod_id_location, no2_ppb
    from AQMesh.NO2_scaled_hightimeres_ppb_20180901_20190630 a
    join AQMesh.NO2_site_metadata_v2_1_20180901_20190630 b
    on a.pod_id_location=b.pod_id_location
    where ULEZ = true and no2_ppb <> -999
    and a.pod_id_location in 
      --limit to pods with at least 6000 hours
                                             (vcfdf['TestBias']=='Pass') &
                                             (vcfdf['CHROM']==reference) ]['Pi']))
    return testwindows



# Generate new dataframe with analyses performed per window
if options.graphics == True:
	print "Analysing by "+ str(windowsize) +"sliding windows and generating plots"
	windowed_df = pd.DataFrame({'window':sorted(list(set(vcfdf['window']))),
        	                   'MaxMinor':windowMax(sorted(list(set(vcfdf['window'])))),
                	           'Pi':windowPi(sorted(list(set(vcfdf['window']))))})


# Now try and plot graph
	p_MaxMinor = gg.ggplot(gg.aes('window', 'MaxMinor'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Minor Variant Frequency (%)") +gg.ggtitle(vcfoutput + "\n Valid Minor Variant Sites :" + str(len(minorvar))) 


# Plot Nucleotide Diversity (Pi) along genome 
	p_pi =gg.ggplot(gg.aes('window', 'Pi'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Mean nucleotide diversity (" + u"\u03c0" +")") +gg.scale_y_continuous(expand=(0,0),limits=(0, windowed_df['Pi'].max(axis=0)+0.001)) +gg.ggtitle(vcfoutput + "\n Genome-wide Mean Nucleotide Diversity (" +u"\u03c0"+ ") :" +str(round(gw_Pi,6))) 

#p_pi

# Facetted plot (still not sorted y axes labels yet)
	windowed_df_melt = pd.melt(windowed_df, id_vars=['window'])
	p_combi = gg.ggplot(gg.aes('window', 'value',colour='variable'),data=windowed_df_melt)
	p_combi = p_combi + gg.geom_point(colour='variable') + gg.facet_grid('variable',scales='free_y')+gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")")

# Print graphs to .png
	p_combi.save(vcfinput + ".MinorVar_combo.png")
	p_MaxMinor.save(vcfinput + ".MinorVar.png")
Exemplo n.º 8
0
import sys
from pandas.plotting import register_matplotlib_converters

register_matplotlib_converters()

species = 'no2'
df = pd.read_csv(r'.\charts\background_data_melted.csv',
                 index_col='idx',
                 dtype={
                     'timestamp': 'str',
                     'vidperiod': 'str',
                     'type': 'str',
                     'param': 'str',
                     'value': 'float64'
                 })
print(df[:10])
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %H:%M:%S")
#plots
plt1 = gg.ggplot(df, gg.aes(
    x='timestamp', y='value', color='type')) + gg.geom_line() + gg.xlab(
        'Time') + gg.ylab('Concentration') + gg.theme_bw() + gg.ylim(
            0, 100) + gg.facet_wrap('vidperiod', scales='free') + gg.ggtitle(
                'Regional background comparison {0}'.format(species))
#+gg.theme(axis_text_x=gg.element_text(angle=20))
plt1.save(filename=r'.\charts\background_{0}_ggtest_{1}.png'.format(
    species,
    dt.datetime.today().strftime('%Y%b%d')),
          width=None,
          height=None,
          dpi=300)
Exemplo n.º 9
0
                                  for x in repeatedKnnResults],
                                 columns = ['p',
                                            'k',
                                            'cvAccuracy',
                                            'testAccuracy'])


ggdata = pandas.concat(
    [DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'cv',
                'Accuracy' : knnResultsSimplified.cvAccuracy}),
     DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'test',
                'Accuracy' : knnResultsSimplified.testAccuracy})],
    axis = 0
)

ggobj = ggplot.ggplot(
    data = ggdata,
    aesthetics = ggplot.aes(x='log10(p)', y='Accuracy',
                            color='type', group='type', linetype='type')
)
ggobj += ggplot.theme_bw()
# ggobj += ggplot.scale_x_log()
ggobj += ggplot.geom_point(alpha=0.6)
ggobj += ggplot.stat_smooth()
ggobj += ggplot.facet_wrap('k') 
print(ggobj)
def firms_dynamics_plot(decision):
    data = pd.read_csv(os.path.join(
        parameters.OUTPUT_PATH,
        "temp_general_firms_pop_%s_decision_%s_time_%s.txt" %
        (parameters.pop_redutor, decision, parameters.final_Time)),
                       sep=",",
                       header=None,
                       decimal=",").astype(float)
    # renaming the collunms names
    data.columns = [
        'time', 'total_firms', 'average_output', 'average_age', 'average_size',
        'new_firms', 'exit_firms', 'max_size', 'total_effort', 'average_effort'
    ]

    #logical test to control the process of burn the initial
    if parameters.time_to_cut_plots > 0:
        data = data.loc[(
            data['time']).astype(int) >= parameters.time_to_cut_plots, :]

    # variable to add in the plot title
    title_pop_val = float(parameters.pop_redutor) * 100

    # create a list of a years to plot
    list_of_years_division = list(
        range(int(data['time'].min()), int(data['time'].max()),
              12)) + [data['time'].max() + 1]
    list_of_years = [int(i / 12) for i in list_of_years_division]

    # graph paramter variables
    dpi_var_plot = 700
    width_var_plot = 15
    height_var_plot = 10

    ###############################################################################################################
    # plotting AGENTS UTILITY
    # Total firms
    plot_data = gg.ggplot(data, gg.aes('time', 'total_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years) +\
                gg.ggtitle('Total firms') + gg.xlab('Years') + gg.ylab('Total of Firms')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_total_firms_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_total_firms_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_total_firms_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of output
    plot_data = gg.ggplot(data, gg.aes('time', 'average_output')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of output') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_output_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_output_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_output_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of age
    plot_data = gg.ggplot(data, gg.aes('time', 'average_age')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of age of firms') + gg.xlab('Years') + gg.ylab('Age of Firms')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_age_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_age_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_age_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of size
    plot_data = gg.ggplot(data, gg.aes('time', 'average_size')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of size of firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_size_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_size_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_size_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # number of new firms
    plot_data = gg.ggplot(data, gg.aes('time', 'new_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Number of new firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Number of firms out
    plot_data = gg.ggplot(data, gg.aes('time', 'exit_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Number of firms out') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average and max size of firms
    dat_merged = pd.concat([
        data.iloc[:, data.columns == 'average_effort'],
        data.iloc[:, data.columns == 'total_effort']
    ],
                           axis=1)

    plot_data = dat_merged.plot(
        title='Average and maximum effort of employees')
    plot_data.set_xlabel('Years')
    plot_data.set_ylabel('Values units of effort')
    plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plot_data.set_xticks(list_of_years_division)
    plot_data.set_xticklabels(list_of_years)
    plot_data.set_axis_bgcolor('w')
    fig = plot_data.get_figure()
    fig.set_size_inches(width_var_plot, height_var_plot)

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    fig.savefig(os.path.join(
        parameters.OUTPUT_PATH,
        ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
         (decision, title_pop_val, parameters.final_Time))),
                dpi=dpi_var_plot)

    dat_merged = pd.concat([
        data.iloc[:, data.columns == 'average_size'],
        data.iloc[:, data.columns == 'max_size']
    ],
                           axis=1)

    plot_data = dat_merged.plot(title='Average and maximum size firms')
    plot_data.set_xlabel('Years')
    plot_data.set_ylabel('Number of employees')
    plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plot_data.set_xticks(list_of_years_division)
    plot_data.set_xticklabels(list_of_years)
    plot_data.set_axis_bgcolor('w')
    fig = plot_data.get_figure()
    fig.set_size_inches(width_var_plot, height_var_plot)

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    fig.savefig(os.path.join(
        parameters.OUTPUT_PATH,
        ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
         (decision, title_pop_val, parameters.final_Time))),
                dpi=dpi_var_plot)
Exemplo n.º 11
0
    df_along = df_a.melt(id_vars=['site_str', 'n_passes'],
                         value_vars=['p05', 'p25', 'p50', 'p75', 'p95'],
                         var_name='yparam',
                         value_name='value')
    print(c['name'])
    #print(df_a)

    #plots
    #split percentiles into different charts, all sites
    #plt1 = gg.ggplot(df_along, gg.aes(x='n_passes',y='value',color='site_str'))+gg.geom_point()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.xlim(0,100)+gg.facet_wrap('yparam',scales='free_y')
    #plt1.save(filename = r'..\charts\bias_{0}.png'.format(c['name']), width=None, height=None, dpi=200)
    #n_segments
    plt2 = gg.ggplot(
        df_a, gg.aes(x='n_passes', y='n_segments', color='site_str')
    ) + gg.geom_line() + gg.xlab('n, number drive periods') + gg.ylab(
        'Sample size (number of drive patterns)') + gg.theme_bw() + gg.xlim(
            0, 35) + gg.ylim(0, 2000)
    plt2.save(filename=r'..\charts\n_segments_{0}_{1}.png'.format(
        c['name'], dtstamp),
              width=None,
              height=None,
              dpi=200)
    #combine percentiles, split sites
    plt3 = gg.ggplot(
        df_along, gg.aes(x='n_passes', y='value', color='yparam')
    ) + gg.geom_line() + gg.xlab('n, number of drive periods') + gg.ylab(
        'Sample error (%)') + gg.theme_bw() + gg.xlim(0, 35) + gg.ylim(
            -100, 100) + gg.geom_hline(
                y=25, linetype="dashed", color="gray") + gg.geom_hline(
                    y=-25, linetype="dashed", color="gray") + gg.geom_vline(
                        x=[10, 15], linetype="dashed",
Exemplo n.º 12
0
                                  for x in repeatedKnnResults],
                                 columns = ['p',
                                            'k',
                                            'cvAccuracy',
                                            'testAccuracy'])


ggdata = pandas.concat(
    [DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'cv',
                'Accuracy' : knnResultsSimplified.cvAccuracy}),
     DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'test',
                'Accuracy' : knnResultsSimplified.testAccuracy})],
    axis = 0
)

ggobj = ggplot.ggplot(
    data = ggdata,
    aesthetics = ggplot.aes(x='log10(p)', y='Accuracy',
                            color='type', group='type', linetype='type')
)
ggobj += ggplot.theme_bw()
# ggobj += ggplot.scale_x_log()
ggobj += ggplot.geom_point(alpha=0.6)
ggobj += ggplot.stat_smooth()
ggobj += ggplot.facet_wrap('k') 
print ggobj