示例#1
0
文件: lplot.py 项目: ldhalstrom/lib
def UseSeaborn(palette='deep'):
    """Call to use seaborn plotting package
    """
    import seaborn as sns
    #No Background fill, legend font scale, frame on legend
    sns.set(style='whitegrid', font_scale=1.5, rc={'legend.frameon': True})
    #Mark ticks with border on all four sides (overrides 'whitegrid')
    sns.set_style('ticks')
    #ticks point in
    sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

    # sns.choose_colorbrewer_palette('q')

    #Nice Blue,green,Red
    # sns.set_palette('colorblind')
    if palette == 'xkcd':
        #Nice blue, purple, green
        sns.set_palette(sns.xkcd_palette(xkcdcolors))
    else:
        sns.set_palette(palette)
    #Nice blue, green red
    # sns.set_palette('deep')

    # sns.set_palette('Accent_r')
    # sns.set_palette('Set2')
    # sns.set_palette('Spectral_r')
    # sns.set_palette('spectral')

    #FIX INVISIBLE MARKER BUG
    sns.set_context(rc={'lines.markeredgewidth': 0.1})
示例#2
0
def plot_morph(good_spikes, cluster, morph_dim, spacing=.02, ymax=.04):
    plt.figure(figsize=(20,20))
    with sns.color_palette(sns.xkcd_palette(["twilight blue", "kermit green"]), 2):
        plt.subplot(222)
        stim_name = morph_dim[1]+"_rec"
        spks2plot = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))]
        plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name)
        stim_name = morph_dim+'128'
        spks2plot = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))]
        plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name)
        plt.legend(loc=1)
        ax = plt.gca()
        ax.plot((0, 0), (0, ymax), c=".2", alpha=.5)
        ax.plot((stim_length, stim_length), (0, ymax), c=".2", alpha=.5)
        xlim(-.5, 1)
        ylim(0,ymax)
        plt.xticks([0, .5])
        plt.yticks([0, .5*ymax, ymax])
        plt.title('cell: %d   morph dim: %s' % (cluster, morph_dim))

        plt.subplot(224)
        stim_name = morph_dim[0]+"_rec"
        spks2plot = good_spikes[(spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))]
        plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name)
        stim_name = morph_dim+'001'
        spks2plot = good_spikes[(spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))]
        plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name)
        plt.legend(loc=1)
        ax = plt.gca()
        ax.plot((0, 0), (0, ymax), c=".2", alpha=.5)
        ax.plot((stim_length, stim_length), (0, ymax), c=".2", alpha=.5)
        xlim(-.5, 1)
        ylim(0,ymax)
        plt.xticks([0, .5])
        plt.yticks([0, .5*ymax, ymax])

    with sns.color_palette(sns.diverging_palette(262, 359, s=99, l=43, sep=1, n=128, center="dark"), 128):
        plt.subplot(121)
        spks_morph = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['morph_dim']==morph_dim)]
        morph_ymax = 128*spacing+ymax
        for morph_pos in np.unique(spks_morph['morph_pos'].values):
            stim_name = morph_dim + str(int(morph_pos))
            spks2plot = spks_morph[spks_morph['morph_pos'] == morph_pos]
            plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, offset=morph_pos*spacing, label=stim_name)
        ax = plt.gca()
        ax.plot((0, 0), (0, morph_ymax), c=".2", alpha=.5)
        ax.plot((stim_length, stim_length), (0, morph_ymax), c=".2", alpha=.5)
        xlim(-.5, 1)
        ylim(0,morph_ymax)
        plt.xticks([0, .5])
        plt.yticks([])
        plt.tick_params(axis='y', which='both', bottom='off', top='off', labelbottom='off')
    sns.despine()
def create_impDum_barplot(ginfo):    
    #runs to loop through
    suffix_list = ["_impDums", "", "_dumsOnly"]
    #labels to appear in graph legend
    list_desc = ["Clinical values + imputation indicators",
                "Clinical values only",
                "Imputation indicators only"]
    predictor_desc = "covarlist_all" 
    figName   = ginfo.FileNamePrefix + '_' + predictor_desc + ginfo.patient_sample + '_ImpAnalysis' 
    tableName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + ginfo.patient_sample + '.txt'
    resultsDF = pd.read_csv(outDir + 'R_' + tableName, sep=",")
    alg_names = resultsDF['Unnamed: 0'] #algorithm names
    print "alg_names: " , alg_names
    initial_pos = np.arange(len(alg_names))*(
        len(suffix_list)+1)+len(suffix_list)+1
    bar_width = 1
    colors = ["amber","windows blue","greyish"]
    mycolors = sns.xkcd_palette(colors)
    plt.figure(figsize=(6.7,8))
    #cycle through each patient list
    plots = []
    for counter, suffix in enumerate(suffix_list):
        tableName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + \
            ginfo.patient_sample + suffix + '.txt'
        resultsDF = pd.read_csv(outDir + 'R_' + tableName, sep=",")
        measurements = np.array(resultsDF['cvAUC'])
        z = stats.norm.ppf(.95)
        SEs = [( np.array(resultsDF['cvAUC']) - np.array(resultsDF['ci_low']) )/z, 
               ( np.array(resultsDF['ci_up']) - np.array(resultsDF['cvAUC']) )/z ]
        alg_pos = initial_pos - counter 
        print "measurements: " , measurements
        print "alg_pos: " , alg_pos
        plot = plt.barh(bottom=alg_pos, width=measurements, height=bar_width,
                        xerr=SEs, error_kw=dict(ecolor='.1', lw=1, capsize=1, capthick=1),
                        align='center', alpha=1, 
                        color=mycolors[counter], label=list_desc[counter])
        plots.append(plot)
    plt.xlabel = "cvAUC"
    plt.xlim(.5, 1)
    plt.ylim(0,max(initial_pos)+2)
    print "counter: " , counter
    plt.yticks(initial_pos - counter/2, alg_names)
    plt.legend(prop={'size':8})
    plt.tight_layout()
    plt.savefig(outDir + figName + '.eps', dpi=1200)
    plt.close()    
    def iterateAlignment( self ):

        # Setup drawing

        #colors = ["purple", "light purple", 
        #        "blue", "cyan", "neon blue"]
                #"red", "rose",
                #"green", "bright green", "mint"]
#        roygbv
        co = ['lightish red', 'yellowish orange', 'canary yellow', 'lime', 'cyan']#,'lavender]

        co = random.sample( sns.xkcd_rgb.keys(), self.n )
        pal = sns.xkcd_palette( co )
        

        for i in range( self.nIters ):
            print i
            f, (ax1,ax2) = plt.subplots(1,2, sharex= True, sharey=True)
            ## Calculate mean shape
            self.calcMeanShape( )
            ax1.plot( self.meanShape.xs, self.meanShape.ys, c = 'k', lw = 1 )            
            
            ## Normalize mean shape
            self.normShape( self.meanShape )

            map( lambda t : t.draw( pal, ax1 ), self.allShapes )



            ## Realign
            self.alignAllShapes( )
            
            map( lambda t : t.draw( pal, ax2 ), self.allShapes )
            ax2.plot( self.meanShape.xs, self.meanShape.ys, c = 'k', lw = 1 )

            # Draw change
            self.calcMeanShape()


            f.savefig( "C:/Users/Valerie/Desktop/stars/plots%d/%d.png" % (self.n, i ) )
            f.clear()
            plt.close()
            i += 1
    def iterateAlignment( self ):

        # Setup drawing
        
        f, (ax1,ax2) = plt.subplots(1,2)#, sharex= True, sharey=True)
        colors = ["purple", "light purple", 
                "blue", "cyan", "neon blue",
                "red", "rose",
                "green", "bright green", "mint"]
        pal = sns.xkcd_palette( colors )
        
        iter = 0

        # Draw no change
        self.calcMeanShape()
        self.drawAll( ax1, pal )

        # 1. Align to first shape (instantiation)
        self.alignAllShapes( self.allShapes[0] )

        while( iter < 500 ):
            print iter
            if iter > 0:
                f, (ax1,ax2) = plt.subplots(1,2, sharex= True, sharey=True)
                self.drawAll( ax1, pal ) #previous iter
            ## Calculate mean shape
            self.calcMeanShape( )

            ## Normalize mean shape to first shape
            self.normTrans( self.allShapes[0] )

            ## Realign
            self.alignAllShapes( self.meanShape )
        
            # Draw change
            self.calcMeanShape()
            self.drawAll( ax2, pal )
            plt.legend()
            f.savefig( "C:/Users/Valerie/Desktop/stars/plots5/%d.png" % iter )
            f.clear()
            plt.close()
            iter += 1
示例#6
0
    def __init__(self, df, var1, var2, classvar,
                 nn_range=range(1,101),
                 granularity=50.,
                 buffer_denom=15.,
                 figsize=(9,7),
                 dotsize=70,
                 point_colors=sns.xkcd_palette(['windows blue', 'amber']),
                 mesh_colors=['#8FCCFF', '#FFED79']):

        self.df = df
        self.var1 = var1
        self.var2 = var2
        self.classvar = classvar
        self.nn_range = nn_range
        self.granularity = granularity
        self.buffer_denom = buffer_denom
        self.figsize = figsize
        self.dotsize = dotsize
        self.point_colors = point_colors
        self.mesh_colors = mesh_colors
    def iterateAlignment( self ):

        # Setup drawing

        #colors = ["purple", "light purple", 
        #        "blue", "cyan", "neon blue"]
                #"red", "rose",
                #"green", "bright green", "mint"]
#        roygbv
        co = ['lightish red', 'yellowish orange', 'canary yellow', 'lime', 'cyan']#,'lavender]
        pal = sns.xkcd_palette( co )
        

        for i in range( self.nIters ):
            f, (ax1,ax2) = plt.subplots(1,2)#, sharex= True, sharey=True)
            ## Calculate mean shape
            self.calcMeanShape( )
            ax1.plot( self.meanShape.xs, self.meanShape.ys, 'k' )            
            
            ## Normalize mean shape
            self.normMeanShape( )

            for sh in self.allShapes:
                sh.draw( pal, ax1)
                


            ## Realign
            self.alignAllShapes( )
            for sh in self.allShapes:
                sh.draw( pal, ax2 )
                ax2.plot( self.meanShape.xs, self.meanShape.ys, 'k' )

            # Draw change
            self.calcMeanShape()


            f.savefig( "C:/Users/Valerie/Desktop/stars/plots5/%d.png" % i )
            f.clear()
            plt.close()
            i += 1
    def alignTrainingSet( self ):

        ## Setup drawing
        co = random.sample( sns.xkcd_rgb.keys(), self.n )
        pal = sns.xkcd_palette( co )
        

        for i in range( self.nIters ):
            start = time.time()

            # Calculate mean shape
            self.asm.meanShape = self.asm.calcMeanShape()
            
            if i == 0:
                map( lambda t : t.draw( pal, plt ), self.asm.allShapes )
                plt.plot( self.asm.meanShape.xs, self.asm.meanShape.ys, c = 'k', lw = 1 )
                plt.gca().invert_yaxis()
                plt.savefig( os.path.join( self.out, "no-alignment-%d.png" % i ) )

                plt.close()

            # Normalize mean shape
            self.asm.normMeanShape = self.asm.normShape( self.asm.meanShape )

            # Align all shapes to normalized mean shape
            self.asm.allShapes = self.alignAllShapes()
            map( lambda t : t.draw( pal, plt ), self.asm.allShapes )
            plt.plot( self.asm.normMeanShape.xs, self.asm.normMeanShape.ys, c = 'k', lw = 1 )
            plt.gca().invert_yaxis()
            plt.savefig( os.path.join( self.out, "alignment-%d.png" % ( i ) ) )
            plt.close()

            with open( os.path.join( self.out, 'log.txt' ), 'a' )  as of:
                of.write( "AlignIter: %f\n" % ( time.time() - start ) )
                of.write( '%d\n\n' % i )
            print i


        return self.asm
示例#9
0
def prepare_and_plot_1_2(dataset_name):
    deltas = [
        '1.0E-5', '1.0E-6', '1.0E-7', '1.0E-8', '1.0E-9', '1.0E-10', '1.0E-11',
        '1.0E-12', '1.0E-13', '1.0E-14', '1.0E-15', '1.0E-16', '1.0E-17',
        '1.0E-18', '1.0E-19', '1.0E-20'
    ]
    deltas.reverse()
    if dataset_name == 'adult' or dataset_name == 'housing':
        ks = list(range(200, 4701, 300))
    else:
        raise RuntimeError("Does not recognise dataset", dataset_name)

    path_in = os.getcwd()
    pattern = '^.*/thesis-data-anonymisation/'
    path = re.search(pattern, path_in).group(0)

    file_path_safepub = path + 'data/result/safepub_test/1_2/' + dataset_name
    file_path_sc = path + 'data/result/sc_test/1_2/' + dataset_name
    pattern = "^norm_result_"

    plot_path = path+'data/result/plots/needs_editing/' \
                    '1_2_'+dataset_name+'.jpg'

    files_safepub = [
        file for file in os.listdir(file_path_safepub)
        if re.match(pattern, file)
    ]
    files_sc = [
        file for file in os.listdir(file_path_sc) if re.match(pattern, file)
    ]

    information_loss = []
    models = []
    parameters = []

    for file in files_sc:
        k = int(re.split(pattern + "k_|.csv", file)[1])
        res_data = pd.read_csv(file_path_sc + '/' + file)
        data = res_data['sse']
        information_loss += list(data)
        models += ['MicroDP'] * len(data)
        parameters += [k] * len(data)

    for file in files_safepub:
        delta = re.split(pattern + "delta_|.csv", file)[1]
        res_data = pd.read_csv(file_path_safepub + '/' + file)
        data = res_data['sse']
        information_loss += list(data)
        models += ['SafePub'] * len(data)
        parameters += [ks[deltas.index(delta)]] * len(data)

    array = np.array([information_loss, parameters]).T

    df = pd.DataFrame(data=array, columns=['Information loss', 'k/𝛿'])
    df['Model'] = models

    ax = sns.lineplot(x='k/𝛿',
                      y='Information loss',
                      hue='Model',
                      style='Model',
                      data=df,
                      palette=sns.xkcd_palette(['windows blue', 'amber']),
                      markers=['o', 'o'],
                      dashes=False)
    ax.set(ylim=(0.0, 0.75))
    #plt.show()
    plt.savefig(plot_path)
    plt.clf()
示例#10
0
def prepare_and_plot_2_1(dataset_name):
    match = "^norm_result_"

    path_in = os.getcwd()
    pattern = '^.*/thesis-data-anonymisation/'
    path = re.search(pattern, path_in).group(0)

    safepub_path = path + "data/result/safepub_test/2_1/" + dataset_name

    sc_path = path + "data/result/sc_test/2_1/" + dataset_name

    plot_path = path + "data/result/plots/2_1_" + dataset_name + ".jpg"

    if dataset_name == 'adult':
        attribute_range = list(range(2, 9))
    elif dataset_name == 'housing':
        attribute_range = list(range(2, 10))
    elif dataset_name == 'musk':
        attribute_range = list(range(2, 21))
    else:
        raise RuntimeError("Does not recognise dataset", dataset_name)

    num_attributes = []
    information_loss = []
    models = []

    for a in attribute_range:
        sc_files = [
            file for file in os.listdir(sc_path + '/' + str(a))
            if re.match(match, file)
        ]
        safepub_files = [
            file for file in os.listdir(safepub_path + '/' + str(a))
            if re.match(match, file)
        ]

        for file in sc_files:
            df = pd.read_csv(sc_path + '/' + str(a) + '/' + file)
            data = list(df['sse'])
            information_loss += data
            num_attributes += [a] * len(data)
            models += ['MicroDP'] * len(data)

        for file in safepub_files:
            df = pd.read_csv(safepub_path + '/' + str(a) + '/' + file)
            data = list(df['sse'])
            information_loss += data
            num_attributes += [a] * len(data)
            models += ['SafePub'] * len(data)

    array = np.array([num_attributes, information_loss]).T

    df = pd.DataFrame(array,
                      columns=['Number of attributes', 'Information loss'])
    df['Model'] = models

    ax = sns.lineplot(x='Number of attributes',
                      y='Information loss',
                      hue='Model',
                      data=df,
                      palette=sns.xkcd_palette(['windows blue', 'amber']))
    #ax.set(ylim=(0.0, 1.0))
    ax.set(yscale='log')
    plt.show()
    #plt.savefig(plot_path)
    plt.clf()

    return
def create_LCMS_barplot(ginfo, LCMScompare, outcome, FileNameSuffix2):
    """Bar plot with bars grouped by predictor set and colors indicating LCMS run  

        LCMScompare = "NPbins_v_RPbins" to compare NP vs. RP using binned data
        LCMScompare = "NPbins_v_MassHuntNP" to comapre NP binned vs. NP mass hunter
    """

    if LCMScompare == "NPbins_v_RPbins":
        inLCMSData_list = ['NPbins50x50', 'RPbins50x50'] #datafile names
        inLCMSData_desc = ['Normal phase, 50x50 intensity grid',
                       'Reverse phase, 50x50 intensity grid'] #graph labels
        color_list = ["taupe", "plum"] #xkcd colors

    elif LCMScompare == "NPbins_v_MassHuntNP":
        inLCMSData_list = ['NPbins50x50', 'MassHuntNP']
        inLCMSData_desc = ['Normal phase, 50x50 intensity grid',
                       'Normal phase, Mass Hunter'] 
        color_list = ["taupe", "dark teal"] 

    elif LCMScompare == "NonInvasives":
        inLCMSData_list = ['SalivaMH','UrineMH']
        inLCMSData_desc = ['Saliva','Urine'] 
        color_list = ["sky blue","marine blue"]

    elif LCMScompare == "MassHunt _RPvNP":
        inLCMSData_list = ['MassHuntRP_noFill','MassHuntNP']
        inLCMSData_desc = ['Normal phase, Mass Hunter','Reverse phase, Mass Hunter'] 
        color_list = ["plum","teal"]

    elif LCMScompare == "RP_noFillvFill":
        #eventually may instead want MassHuntRP_fill vs. MassHuntRP_isotope
        inLCMSData_list = ['MassHuntRP_noFill','MassHuntRP_fill']
        inLCMSData_desc = ['RP Mass Hunter - no Fill','RP Mass Hunter - with Fill'] 
        color_list = ["plum","mauve"] #'sea blue'

    #first name listed will appear closest to bottom of y-axis
    predcat_names = ['Clinical+LCMS','LCMS only','Clinical only'] 
    alg_list = ['Super Learner','Gradient Boost','AdaBoost','Random Forests']

    figName = ginfo.FileNamePrefix + '_' + LCMScompare 
    plt.figure(figsize=(6.7,8)) 

    ## Prepare data to be graphed   
    df_list = []
    for inLCMSData in inLCMSData_list:
        for predcat in predcat_names:
            if predcat=='Clinical+LCMS':
                resultsDF = pd.read_csv(outDir + 'R_' + ginfo.FileNamePrefix + \
                    "_covarlist_all_" + inLCMSData + FileNameSuffix2 + '.txt', sep=",")
            elif predcat=='Clinical only':
                resultsDF = pd.read_csv(outDir + 'R_' + ginfo.FileNamePrefix + \
                     "_covarlist_all_" + inLCMSData + 'patients' + FileNameSuffix2 + '.txt', sep=",")
            elif predcat=='LCMS only':
                resultsDF = pd.read_csv(outDir + 'R_' + ginfo.FileNamePrefix + \
                     "_covarlist_" + inLCMSData + FileNameSuffix2 + '.txt', sep=",")
            df_list.append(resultsDF)

    ## To fill in during loop
    positions = []
    measurements = []
    colors = []
    method_labels = []
    colors_legend = []
    ytick_labels = []
    ytick_positions = []
    SEs = []
    ymax = 0
    bar_width = 1
    mycolor_list = sns.xkcd_palette(color_list)

    #loop thru predcat_names ("clinical only", "lcms only" etc.)
    for p, predcat in enumerate(predcat_names):

        #cycle through algorithm list ('adaboost', 'RF', etc.)
        for a, alg in enumerate(alg_list):

            #cycle LCMS methods ('urine','RP','NP','masshunt' etc.)
            for d, dataType in enumerate(inLCMSData_list):
                
                df = df_list[d*len(predcat_names) + p]

                #text section headings
                if a==len(alg_list)-1 and d==len(inLCMSData_list)-1:
                    plt.text(.52, ymax+1, predcat_names[p], weight='bold') 
                
                #append to running list of values
                myrow = df.loc[df['Unnamed: 0']==alg]
                measurement = float(myrow['cvAUC'])
                measurements.append(measurement)
                z = stats.norm.ppf(.95)
                SE = float(myrow['se'])
                #SE = [( float(myrow['cvAUC']) - float(myrow['ci_low']) )/z, 
                #        ( float(myrow['ci_up']) - float(myrow['cvAUC']) )/z ]
                SEs.append(SE)
                positions.append(ymax)
                colors.append(mycolor_list[d])
                #add numeric values to plot
                xpos = float(myrow['ci_low']) -.05
                ypos = ymax - .3
                mytext = "%.2f" % measurement
                plt.text(xpos, ypos, mytext, color="white", fontsize=10)
                if d==0:
                    ytick_labels.append(alg)
                    ytick_positions.append(ymax+.5)
                ymax += bar_width

        #add space between groups of bars segmented by predcat values
        ymax += bar_width*3

    print np.array(SEs)
    plt.barh(bottom=positions, width=measurements, height=bar_width,
                    xerr=np.array(SEs), error_kw=dict(ecolor='.1', lw=1, capsize=1, capthick=1),
                    align='center', alpha=1, color=colors)
    plt.yticks(ytick_positions, ytick_labels) #size=16
    plt.xlim(.5, 1)
    plt.ylim(-2, ymax) 

    #make left spacing large enough for labels.  Default is  .1, .9, .9, .1
    plt.subplots_adjust(left=.22, right=.9, top=.9, bottom=.1)
    lhandles = []
    for mycolor in mycolor_list[::-1]:
        hand = mpatches.Patch(color=mycolor)
        lhandles.append(hand)
    leg = plt.legend((lhandles), (inLCMSData_desc[::-1]))
    plt.tight_layout()
    plt.legend()
    plt.savefig(outDir + figName + '.eps', dpi=1200)
    plt.close() 
示例#12
0
# plt.pcolor(dist, cmap=current_cmap)
masked_array = np.ma.array(dist, mask=np.isnan(dist))

np.savetxt('distance_matrix.txt', dist, delimiter=',',  fmt='%1.4e')

#cmap = matplotlib.colors.ListedColormap(['black', 'grey', 'green',  'red',
                                         # 'blue', 'black', 'black'])
#cmap.set_bad('black', 0.8)
#boundaries = [0, 0.001, 0.4,  0.5,  0.55,  0.65, 0.8,  1]
#norm = matplotlib.colors.BoundaryNorm(boundaries, cmap.N, clip=True)


# plt.pcolor(masked_array, cmap='gist_rainbow',
  #          vmin=0.3, vmax=0.6)
plt.axvline(x=78, label='-OSA-'.format(0.3), c='w', linewidth=4)
plt.axhline(y=78, label='-OSA-'.format(0.3), c='w', linewidth=4)
# plt.colorbar()
# plt.show()

l2 = masked_array
l2 = l2 + 0.001
l2 = l2/l2.max()

# uneven bounds changes the colormapping
sns.set()
colors = ["black", "blue", "brown", "red", "yellow", "white"]
sns.heatmap(l2, cmap=sns.xkcd_palette(colors), norm=PowerNorm(gamma=1), vmin=0.6, vmax=l2.max())
# sns.heatmap(l2)
# plt.show()
plt.savefig('/home/milad/geodesic_l2.png', dpi=1000)
示例#13
0
#
SAMPLE_ANNOTATION_FILE = 'sample_annotation_file'
COUNT_FILE = 'count_file'
DISPLAY_COUNT = 'display_count'
DGE_FILE = 'dge_file'
SEQ_DEPTH_FILE = 'seq_depth_file'
FDR_THRESHOLD = 'fdr_threshold'
GROUP_1 = 'g1'
GROUP_2 = 'g2'

DEFAULT_FDR = 0.05

DEFAULT_COLORS = sns.xkcd_palette(["windows blue",
                                   "amber",
                                   "greyish",
                                   "faded green",
                                   "dusty purple",
                                   "pale blue",
                                   "green yellow",
                                   "pumpkin"])

cc = mpl.colors.ColorConverter()
DEFAULT_COLORS = cc.to_rgba_array(DEFAULT_COLORS, alpha=0.5)

class MakeAbsolutePathAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        setattr(namespace, self.dest, os.path.realpath(os.path.abspath(values)))


def parse_cl_args():
    '''
    Parses the command line args
    plt.figure(figsize=0.75 * np.array(snakemake.config["plots"]["figsize"]))
    plt.subplot(111, aspect="equal")
    #plt.scatter(counts["known"], counts[type], s=1, c="k", alpha=0.3, rasterized=True, edgecolors="face", marker="o")
    plt.hexbin(counts["known"], counts[type], cmap=cmap, gridsize=25, clip_on=True)

    maxv = max(plt.xlim()[1], plt.ylim()[1])

    plt.plot([0, maxv], [0, maxv], "--k")
    plt.xlim((0, maxv))
    plt.ylim((0,maxv))
    plt.ylabel("predicted")
    plt.xlabel("truth")
    sns.despine()
    plt.savefig(path, bbox_inches="tight")

colors = sns.xkcd_palette(["grey", "light red"])

plot_hexbin("raw", snakemake.output.scatter_raw, colors[0])
plot_hexbin("posterior", snakemake.output.scatter_posterior, colors[1])

errors = pd.concat(errors)

x, y = snakemake.config["plots"]["figsize"]
plt.figure(figsize=(x * 1.5, y))

pred_errors = errors[(errors["type"] == "raw") | (errors["type"] == "posterior")]
#bins = pd.cut(pred_errors["known"], 
#              [0, 6, 11, 16, 21, 26, 30, 100000], 
#              right=False, 
#              labels=["0-5", "6-10", "11-15", "16-20", "21-25", "26-30", "≥30"])
#pred_errors["bin"] = bins
from ActiveShapeModels import ASM, Point, Shape
import matplotlib.pyplot as plt
import seaborn as sns
import math
import numpy as np

s1 = Shape( [ Point(200,300), Point(100, 200), Point(300, 50 ) ] )
s2 = Shape( [ Point(150,250), Point(50, 100 ), Point(250, 0) ] )



f, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, sharex =True, sharey = True)
s1.draw( sns.xkcd_palette( ["light blue" ]), ax1)
s2.draw( sns.xkcd_palette( ["light blue"] ), ax2)


cmShape  = ASM.centroid( s1)

cmMeanShape = ASM.centroid( s2  )


ax1.scatter( cmShape.x, cmShape.y, c='r')
ax2.scatter( cmMeanShape.x, cmMeanShape.y, c='r')
ax1.plot( [s1.shapePoints[0].x, s1.shapePoints[1].x],
         [s1.shapePoints[0].y, s1.shapePoints[1].y],
         color= 'r', ls = '-')

ax2.plot( [s2.shapePoints[0].x, s2.shapePoints[1].x],
         [s2.shapePoints[0].y, s2.shapePoints[1].y],
         color= 'r', lw = 1, ls = '-')
示例#16
0
load_growth_grouped_total = load_growth.groupby(temp_bins).agg(['sum'])
load_growth_grouped_total['temp_bin'] = temp_labels

# determine max usage by temperature bin
load_growth_grouped_max = load_growth.groupby(temp_bins).agg(['max'])
load_growth_grouped_max['temp_bin'] = temp_labels

# PLOT 1
fig, axes = plt.subplots(nrows=3, ncols=1, sharex=True, sharey=False)
colors = ["windows blue", "amber", "greyish", "faded green"]

# sublot 1: mean
load_growth_grouped_mean.plot.bar(x='temp_bin',
                                  y=['AEV', 'CCHP', 'PHEV', 'eBike'],
                                  label=['AEV', 'CCHP', 'PHEV', 'eBike'],
                                  color=sns.xkcd_palette(colors),
                                  ax=axes[0],
                                  legend=False)
axes[0].tick_params(rotation=0)
axes[0].set_ylabel('mean kWh growth')
fig.legend(loc="center right")

# subplot 2: total
load_growth_grouped_total.plot.bar(x='temp_bin',
                                   y=['AEV', 'CCHP', 'PHEV', 'eBike'],
                                   label=['AEV', 'CCHP', 'PHEV', 'eBike'],
                                   color=sns.xkcd_palette(colors),
                                   ax=axes[1],
                                   legend=False)
axes[1].tick_params(rotation=0)
axes[1].set_ylabel('total kWh growth')
示例#17
0

sns.palplot(sns.color_palette("husl", 8))


# Let me explain these Qualitative (or categorical) palettes. These are best when you want to distinguish discrete chunks of data that do not have an inherent ordering. Ideally, when importing Seaborn, the default color cycle is changed to a set of six colors that evoke the standard matplotlib color cycle. But when we have more than 6, say 8 categories in our data to distinguish, then the most common way is using `hls` color space, which is a simple transformation of *RGB* values.

# Then there is also `hls_palette()` function that lets you control the *lightness* and *saturation* of colors.
# 
# All of it displayed above is just the basic Seaborn aesthetics. Let us now look at *xkcd_rgb* dictionary that has 954 colors in it. Let us try to pull a few out of it:

# In[9]:


sample_colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple", "pale red", "medium green", "denim blue"]
sns.palplot(sns.xkcd_palette(sample_colors))


# Other style is `cubehelix` color palette that makes sequential palettes with a linear increase or decrease in brightness and some variation in [hue](https://en.wikipedia.org/wiki/Hue). Actually let us plot this color palette in a Density contour plot:

# In[15]:


# Default Matplotlib Cubehelix version:
sns.palplot(sns.color_palette("cubehelix", 8))


# In[16]:


# Default Seaborn Cubehelix version:
示例#18
0
import matplotlib.pyplot as plt
import os
import pandas as pd
from scipy import stats
import seaborn as sns
import statsmodels
import statsmodels.api as sm
import subprocess

colors = [ "amber", "faded green"]
palette = sns.xkcd_palette(colors)
sns.palplot(palette)

colors = [ "dusty blue", "greyish"]
es = sns.xkcd_palette(colors)
sns.palplot(es)

colors = [ "dusty purple", "grey"]
pur = sns.xkcd_palette(colors)
sns.palplot(pur)

colors = [ "amber", "greyish", "faded green", "grey"]
enhpal = sns.xkcd_palette(colors)
sns.palplot(enhpal)

colors = [ "amber", "greyish",  "dusty purple", "brown grey",  "windows blue", "bluey grey"]
archpal = sns.xkcd_palette(colors)
sns.palplot(archpal)

FANTOMPATH = "/dors/capra_lab/projects/enhancer_ages/fantom/data/all_fantom_enh/ages/"
FANTOMFILE = "syn_breaks_all_fantom_enh_ages.bed"
                losses.append(float(w[0]))

    return losses, nfes


mnist_singlescale_loss, mnist_singlescale_nfes = get_values(MNIST_SINGLESCALE)
mnist_multiscale_loss, mnist_multiscale_nfes = get_values(MNIST_MULTISCALE)

import brewer2mpl
line_colors = brewer2mpl.get_map('Set2', 'qualitative', 4).mpl_colors
dark_colors = brewer2mpl.get_map('Dark2', 'qualitative', 4).mpl_colors

import seaborn as sns
sns.set_style("whitegrid")
colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
sns.palplot(sns.xkcd_palette(colors))

plt.figure(figsize=(4, 2.6))
plt.scatter(mnist_singlescale_nfes[::10],
            mnist_singlescale_loss[::10],
            color=line_colors[1],
            label="Single FFJORD")
plt.scatter(mnist_multiscale_nfes[::10],
            mnist_multiscale_loss[::10],
            color=line_colors[2],
            label="Multiscale FFJORD")

plt.ylim([0.9, 1.25])
plt.legend(frameon=True, fontsize=10.5)
plt.xlabel("NFE", fontsize=18)
plt.ylabel("Bits/dim", fontsize=18)
"""
import random
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from mpl_toolkits.axes_grid.inset_locator import inset_axes
sns.set_style('white')
sns.set_context('paper', font_scale=1.5)
from glob import glob
from figtools import *
from fusco import SFS

RED, BLUE, GREEN = sns.xkcd_palette(["amber", "dusty purple", "faded green"])
sns.set_context('paper', font_scale=1.5)

pi = np.pi
mu = 0.02
alpha = 30
fusco_alpha = 0.55
fusco_beta = 2.3
mu_drivers = 2 * 2e-5
"""
Allele Frequency Spectra. 
Figure 1
"""

root_folder = '../model/experiments/u0.01/'
death_rate = '005'
示例#21
0
文件: plot_util.py 项目: t-rutten/efn
def plotCategoricalPerformance(x,
                               y,
                               legendstrs=[],
                               plottype='scatter',
                               color_palette=sns.xkcd_palette(colors),
                               dotsize=5,
                               shift=1):
    fontsize = 16
    num_trends = len(y)
    xlen = x.shape[0]
    assert (xlen == y[0].shape[0])
    Ns = []
    for i in range(num_trends):
        Ns.append(y[i].shape[1])
    maxN = max(Ns)

    sizes = dotsize * np.ones((1, ))
    # set up legend
    if (len(legendstrs) > 0):
        for i in range(num_trends):
            color = np.tile(np.array([color_palette[i]]), [1, 1])
            if (plottype == 'scatter'):
                plt.scatter(x[0], y[i][0, 0], np.array([dotsize]), c=color)
            elif (plottype == 'errorBar'):
                plt.scatter(x[0],
                            np.mean(y[i][0, :]),
                            np.array([dotsize]),
                            c=color)
        plt.legend(legendstrs, fontsize=fontsize)

    if (plottype == 'scatter'):
        xvals = np.zeros((num_trends * xlen * maxN, ))
        yvals = np.zeros((num_trends * xlen * maxN, ))
        colors = np.zeros((num_trends * xlen * maxN, 3))
        sizes = dotsize * np.ones((num_trends * xlen * maxN, ))
        ind = 0
        sawzorn = False
        for i in range(num_trends):
            if (plottype == 'scatter'):
                xshift_i = (i - (num_trends - 1) / 2) * shift
            else:
                xshift_i = 0
            N = Ns[i]
            for j in range(xlen):
                for n in range(N):
                    yval = y[i][j, n]
                    if (not sawzorn and (yval == 0 or np.isnan(yval))):
                        print('saw a zero or nan')
                        sawzorn = True
                        continue
                    yvals[ind] = yval
                    colors[ind, :] = np.array([color_palette[i]])
                    xvals[ind] = x[j] + xshift_i
                    ind += 1
        plt.scatter(xvals[:ind], yvals[:ind], sizes[:ind], c=colors[:ind])

    elif (plottype == 'errorBar'):
        sizes = dotsize * np.ones((xlen, ))
        means = np.zeros((num_trends, xlen))
        stds = np.zeros((num_trends, xlen))
        for i in range(num_trends):
            # make sure at the end there are no nans!
            means_i = np.nanmean(y[i], 1)
            means[i] = means_i
            stds_i = np.nanstd(y[i], 1) / np.sqrt(Ns[i])
            stds[i] = stds_i
            plt.plot(x, means_i, '-', c=color_palette[i], lw=2)
        for i in range(num_trends):
            for j in range(xlen):
                plt.plot([x[j], x[j]],
                         [means[i, j] - stds[i, j], means[i, j] + stds[i, j]],
                         '-',
                         c=color_palette[i],
                         lw=2)

    return None
示例#22
0
           col_wrap=3, data=all_data13, order=1,palette=palette,size=4).set(ylim=(0, 1))

#==============================================================================
# Unsupervised Learning - Cluster analysis on Shell data
#==============================================================================
from sklearn.cluster import KMeans

shell=pd.DataFrame()
shell=all_data13[all_data13['name']=='RDSB.L']
# We need to scale also oil price, so clustering is not influenced by the relative size of one axis.
shell['oil_price_scaled']=scaler.fit_transform(shell['oil_price'].to_frame())
shell['cluster'] = KMeans(n_clusters=6, random_state=1).fit_predict(shell[['share_price_scaled','oil_price_scaled']])

# The 954 most common RGB monitor colors https://xkcd.com/color/rgb/
colors = ['baby blue', 'amber', 'scarlet', 'grey','milk chocolate', 'windows blue']
palette=sns.xkcd_palette(colors)

sns.lmplot(x='oil_price', y='share_price_scaled',ci=None,palette=palette, hue='cluster',fit_reg=0 ,data=shell)

#==============================================================================
# Supervised learning linear regression
#==============================================================================

from sklearn import linear_model

# 1.- Data preparation
shell15=pd.DataFrame()
shell15=all_data13[(all_data13['name']=='RDSB.L') & (all_data13['year']>2015 )] # Extract data from years 2016/17
shell15=shell15[['share_price','oil_price']].reset_index()

# Just using 1 variable for linear regression. To try with more variables use randomforest
示例#23
0
def prepare_and_plot_1_1_a(count_records=True):
    path_in = os.getcwd()
    pattern = '^.*/thesis-data-anonymisation/'
    path = re.search(pattern, path_in).group(0)

    file_path_adult = path + 'data/result/safepub_test/1_1/adult'

    file_path_housing = path + 'data/result/safepub_test/1_1/housing'
    if count_records:
        filename = "num_suppressed_records_granularity.csv"
    else:
        filename = "num_suppressed_attributes_granularity.csv"

    dataset_adult = pd.read_csv(path + 'data/adult/adult.csv')
    dataset_res_adult = pd.read_csv(file_path_adult + '/' + filename)

    dataset_housing = pd.read_csv(path + 'data/housing/housing.csv')
    dataset_res_housing = pd.read_csv(file_path_housing + '/' + filename)

    if count_records:
        denom_adult = len(dataset_adult.values)
        denom_housing = len(dataset_housing.values)
        y_label = 'Suppressed records'
    else:
        denom_adult = len(dataset_adult.columns)
        denom_housing = len(dataset_housing.columns)
        y_label = 'Suppressed attributes'

    plot_path = path + "data/result/plots/1_1_a_" + y_label + ".jpg"

    frequencies = []
    epsilons = []
    datasets = []

    for eps in dataset_res_adult.columns:
        freqs_adult = dataset_res_adult[eps] / denom_adult
        frequencies += list(freqs_adult)
        epsilons += [float(eps)] * len(freqs_adult)
        datasets += ['Adult'] * len(freqs_adult)

        freqs_housing = dataset_res_housing[eps] / denom_housing
        frequencies += list(freqs_housing)
        epsilons += [float(eps)] * len(freqs_housing)
        datasets += ['Housing'] * len(freqs_housing)

    array = np.array([epsilons, frequencies]).T

    df = pd.DataFrame(array, columns=['ε', y_label])
    df['Dataset'] = datasets

    ax = sns.lineplot(x='ε',
                      y=y_label,
                      hue='Dataset',
                      data=df,
                      palette=sns.xkcd_palette(['teal', 'orange',
                                                'deep pink']))
    ax.set(ylim=(0, 1.05))
    #plt.show()
    plt.savefig(plot_path)
    plt.clf()

    return
示例#24
0
                pyplot.savefig('./plots/distributions_' + strict_name +
                               '/distribution_augmentation_' +
                               str(crop_metric) + kk + SMALL + '.pdf',
                               dpi=1000)

            all_nets = [experiments.opt[i + 6].name for i in range(5)]
            name_nets = [
                'Non Regularized', 'Data augment.', 'Dropout', 'Weight Decay',
                'All Regularizers'
            ]

            colors = ["amber", "greyish", "orange", "black"]

            for idx_metric, crop_metric in enumerate(crops):
                cc = itertools.cycle(sns.xkcd_palette(colors))
                fig, ax = pyplot.subplots()
                for idx_net, nets in enumerate(all_nets):
                    if SMALL == '':
                        tmp = np.load(PATH_TO_DATA + '/tmp_results_' + kk +
                                      nets + '.npy')
                    else:
                        tmp = np.load(PATH_TO_DATA + '/tmp_results_' + kk +
                                      nets + '_' + SMALL + '.npy')

                    mm = np.zeros([TOTAL])
                    for image_id in range(TOTAL):
                        mm[image_id] += tmp[idx_metric][STRICT][0][image_id][0]
                        mm[image_id] += tmp[idx_metric][STRICT][0][image_id][1]

                    if idx_net == 0:
from ActiveShapeModels import ASM, Point, Shape
import matplotlib.pyplot as plt
import seaborn as sns
import math
import numpy as np

#s1 = Shape( [ Point(200,300), Point(100, 200), Point(300, 50 ) ] )
#s2 = Shape( [ Point(150,250), Point(50, 100 ), Point(250, 0) ] )



s1 = Shape( [ Point(857, -129), Point(89,-409), Point(-404,254), Point( 96,957), Point(877,712) ])

f, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2)

s1.draw( sns.xkcd_palette( ["light blue" ]), 0, ax1)
#s2.draw( sns.xkcd_palette( ["light blue"] ), ax2)

cmShape  = ASM.centroid( s1 )
#cmMeanShape = ASM.centroid( s2  )


ax1.scatter( cmShape.x, cmShape.y, c='r')
#ax2.scatter( cmMeanShape.x, cmMeanShape.y, c='r')
ax1.plot( [s1.shapePoints[0].x, s1.shapePoints[1].x],
         [s1.shapePoints[0].y, s1.shapePoints[1].y],
         color= 'r', ls = '-')

#ax2.plot( [s2.shapePoints[0].x, s2.shapePoints[1].x],
#         [s2.shapePoints[0].y, s2.shapePoints[1].y],
#         color= 'r', lw = 1, ls = '-')
示例#26
0
def plot_kdes(labels=None,
              results=None,
              category=None,
              df=None,
              label_col=None,
              result_col=None,
              colors=None,
              **kwargs):
    """
    Plots KDEs and Cumulative KDEs
    Requires seaborn for plotting

    Can either pass in arrays of labels/results or else df

    Parameters
    -----------
    labels : array_like
        categorical values
    results : array_like
        numerical values
    category : string, optional
        name of label category for plotting, e.g. 'Gender'
    df : pandas DataFrame, optional
    label_col : string, optional
        name of labels column in df
    result_col : string, optional
        name of results column in df
    colors : list of strings, optional
        takes xkcd hue labels, e.g. ['red', 'blue', 'mustard yellow']
        more here: https://xkcd.com/color/rgb/

    Returns
    --------
    ax : numpy array of matplotlib axes

    Plots
    -------
    (1,2) subplots: KDE and cumulative KDE by group in `labels`
    """
    import seaborn as sns
    if df is None:
        df = pd.DataFrame(list(zip(labels, results)),
                          columns=['label', 'result'])
    else:
        df = df.rename(columns={label_col: 'label', result_col: 'result'})
    unique_labels = df.label.dropna().unique()
    nlabels = len(unique_labels)

    # Check if there is a distribution to plot in each group
    stds = df.groupby('label')[['result']].std()
    if 0 in stds.values:
        groups = stds.index[stds['result'] == 0].values
        print('No distribution of results in groups: %s' %
              ', '.join([str(i) for i in groups]))
        return

    if not colors:
        base_colors = ['red', 'blue']
        others = list(set(sns.xkcd_rgb.keys()) - set(base_colors))
        extra_colors = list(np.random.choice(others, nlabels, replace=False))
        colors = list(base_colors + extra_colors)[:nlabels]
    sns.set_palette(sns.xkcd_palette(colors))
    fig, ax = plt.subplots(1, 2, figsize=(16, 6))
    if not category:
        category = '_vs_'.join(map(str, unique_labels))
    ax[0].set_title("%s KDEs" % category)
    ax[1].set_title("%s Cumulative KDEs" % category)
    ax[0].set_ylabel('Frequency')
    ax[1].set_ylabel('Group Fraction Below')
    ax[0].set_xlabel('Threshold')
    ax[1].set_xlabel('Threshold')
    for lab in unique_labels:

        sns.kdeplot(df.loc[df.label == lab].result,
                    shade=True,
                    label=lab,
                    ax=ax[0],
                    **kwargs)
        sns.kdeplot(df.loc[df.label == lab].result,
                    shade=False,
                    label=lab,
                    ax=ax[1],
                    cumulative=True,
                    **kwargs)

    ax0_max_y = max([max(i.get_data()[1]) for i in ax[0].get_lines()])
    ax[0].set_ylim(0, ax0_max_y * 1.1)
    plt.show()

    return ax
示例#27
0
                   "windows blue",
                   "medium green",
                   "dusty purple",
                   "orange",
                   "amber",
                   "clay",
                   "pink",
                   "greyish",
                   "light cyan",
                   "steel blue",
                   "forest green",
                   "pastel purple",
                   "mint",
                   "salmon",
                   "dark brown"]
    colors = sns.xkcd_palette(color_names)
    cmap = gradient_cmap(colors)
except:
    from matplotlib.cm import get_cmap
    colors = ['b', 'r', 'y', 'g', 'purple']
    cmap = get_cmap("jet")


from pybasicbayes.util.text import progprint_xrange
from pylds.util import random_rotation
from pyslds.models import DefaultSLDS

npr.seed(0)

# Set parameters
K = 5
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
from mpl_toolkits.mplot3d import Axes3D

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

sns.set(color_codes=True, context="poster")
sns.set_style("white", {
    'font.family': 'serif',
    'font.serif': 'Times New Roman'
})

cc = ["light red", "cyan", "apricot"]
sns.set_palette(sns.xkcd_palette(cc), desat=.9)

GEN = 1000  # 500
EXP_NAME = "XENO_3"  #"XENO_Quad_Big"

GET_FRESH_PICKLES = False

PLOT_TRACE = True

RUNS = 100
# NUM_INDS_TO_PLOT_PER_POP = 10

N_ROWS = 10  # 5
N_COLS = 10  # 5

PICKLE_DIR = "/home/sam/Projects/research_code/evosoro/data_analysis/results/{0}_Gen_{1}".format(
示例#29
0
def lc_plot(train_loss_list, train_acc_list, test_loss_list, test_acc_list,
            name):
    iter_list = list(np.arange(0, len(train_loss_list))) + list(
        np.arange(0, len(test_loss_list)))
    type_list = ['Train'] * len(train_loss_list) + ['Test'
                                                    ] * len(test_loss_list)

    colors = ['windows blue', 'watermelon']
    palette = sns.xkcd_palette(colors)
    pdf = PdfPages('plot' + name + '.pdf')
    plt.figure(figsize=(20, 6.5))
    sns.set(style="whitegrid")

    ax1 = plt.subplot(1, 2, 1)

    loss_frame = {
        'Iteration': iter_list,
        'Loss': train_loss_list + test_loss_list,
        'Dataset': type_list
    }
    loss_frame = DataFrame(loss_frame)

    g = sns.lineplot(x="Iteration",
                     y="Loss",
                     hue='Dataset',
                     style='Dataset',
                     data=loss_frame,
                     legend='full',
                     err_style='bars',
                     palette=palette,
                     linewidth=2,
                     err_kws={'elinewidth': 2},
                     ax=ax1)

    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel("Iteration", fontsize=12)
    plt.ylabel("Loss", fontsize=12)
    leg = g.legend(loc='lower left', fontsize=12)
    for legobj in leg.legendHandles:
        legobj.set_linewidth(2.0)

    ax1 = plt.subplot(1, 2, 2)

    loss_frame = {
        'Iteration': iter_list,
        'Acc': train_acc_list + test_acc_list,
        'Dataset': type_list
    }
    loss_frame = DataFrame(loss_frame)

    g = sns.lineplot(x="Iteration",
                     y="Acc",
                     hue='Dataset',
                     style='Dataset',
                     data=loss_frame,
                     legend='full',
                     err_style='bars',
                     palette=palette,
                     linewidth=2,
                     err_kws={'elinewidth': 2},
                     ax=ax1)

    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel("Iteration", fontsize=12)
    plt.ylabel("Acc", fontsize=12)
    leg = g.legend(loc='lower right', fontsize=12)
    for legobj in leg.legendHandles:
        legobj.set_linewidth(2.0)

    pdf.savefig(bbox_inches='tight')
    pdf.close()
    plt.show()
示例#30
0
# generate multi level index for columns for precision
spstd = pd.concat([sstd, pstd], axis=1)
multi_cols = zip((sens_prec_names), spstd.columns)
multi_cols = pd.MultiIndex.from_tuples(multi_cols, names=['SP','Method'])
spstd.columns = multi_cols

#------------------------------------------------------------------------------
#
#                 P L O T T I N G   O F   R E S U L T S
#
#------------------------------------------------------------------------------

# setup seaborn style
colors = ["windows blue", "faded green"]
sns.set_palette(sns.xkcd_palette(colors))
sns.set_style("whitegrid", {"grid.color": ".9"})
sns.set_context("talk")

nrows = 1
ncols = 2
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True)
fig.set_figheight(3.5)
fig.set_figwidth(8.27)
i = 0
ratios_to_plot = range(2)
for c in range(ncols):
    ratio = ratios_to_plot[c]
    means = spmean.iloc[ratio,].T.unstack().T
    means.index.name=""
    error_bars = spstd.iloc[ratio,].T.unstack().T
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import matplotlib.ticker as ticker
import numpy as np
import os, sys
from scipy import stats
import seaborn as sns
import statsmodels
import statsmodels.api as sm


RE ="/dors/capra_lab/projects/enhancer_ages/fantom/results/for_publication/age_breaks/"


colors = [ "amber", "faded green", "dusty purple", "windows blue","greyish"]
palette = sns.xkcd_palette(colors)
sns.palplot(palette)


shuf_colors = [ "amber", "greyish",]
shuf_pal = sns.xkcd_palette(shuf_colors)


#%% Files


path = "/dors/capra_lab/projects/enhancer_ages/fantom/data/"

enh = "%sFANTOM_enh_age_arch_full_matrix.tsv" % path
summaryEnh = "%sFANTOM_enh_age_arch_summary_matrix.tsv" % path
示例#32
0
def prepare_and_plot_3_1_info_loss(dataset_name):
    path_in = os.getcwd()
    pattern = '^.*/thesis-data-anonymisation/'
    result_path = re.search(pattern, path_in).group(0) + 'data/result/'

    epsilons = ['1.0', '2.0']

    record_linkage = []
    sse = []
    model = []
    epsis = []

    plot_path = result_path + '/plots/needs_editing/3_1_b_' + dataset_name + '.jpg'

    for eps in epsilons:
        sc_result_sse = pd.read_csv(result_path + 'sc_test/3_1/' +
                                    dataset_name + '/norm_result_eps-' + eps +
                                    '.csv')['sse']
        sc_result_rl = pd.read_csv(result_path + 'sc_test/3_1/' +
                                   dataset_name + '/result_eps-' + eps +
                                   '.csv')['record_linkage']

        safepub_result_sse = pd.read_csv(result_path + 'safepub_test/3_1/' +
                                         dataset_name +
                                         '/norm_result_granularity_eps-' +
                                         eps + '.csv')['sse']
        safepub_result_rl = pd.read_csv(result_path + 'safepub_test/3_1/' +
                                        dataset_name +
                                        '/result_granularity_eps-' + eps +
                                        '.csv')['record_linkage']

        record_linkage += list((sc_result_rl.values * 1000).astype(int))
        sse += list(sc_result_sse.values)
        model += ['MicroDP'] * len(sc_result_rl.values)
        epsis += [float(eps)] * len(sc_result_rl.values)
        #
        record_linkage += list((safepub_result_rl.values * 1000).astype(int))
        sse += list(safepub_result_sse.values)
        model += ['SafePub'] * len(safepub_result_rl.values)
        epsis += [float(eps)] * len(safepub_result_rl.values)

    array = np.array([record_linkage, sse, epsis]).T

    df = pd.DataFrame(np.array([record_linkage, sse, epsis]).T,
                      columns=['Record linkage', 'Information loss', 'ε'])
    df['Model'] = model

    ax = sns.lineplot(x='Record linkage',
                      y='Information loss',
                      hue='Model',
                      data=df,
                      palette=sns.xkcd_palette(['windows blue', 'amber']))
    # k_ax = sns.scatterplot(x='Record linkage', y='Information loss', hue='Model', data=k_df,
    #                       palette=sns.xkcd_palette(['faded green']))

    # ax.set(ylim=(0.0, 1))
    # k_ax.set(xlim=(0.0, 10))

    plt.show()
    # plt.savefig(plot_path)
    plt.clf()
示例#33
0
                            'axes.titlesize' : 10})

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from hips.plotting.layout import create_figure, create_axis_at_location


import seaborn as sns
color_names = ["windows blue",
               "amber",
               "crimson",
               "faded green",
               "dusty purple",
               "greyish"]
colors = sns.xkcd_palette(color_names)
sns.set(style="white", palette=sns.xkcd_palette(color_names))


from hips.plotting.colormaps import harvard_colors, gradient_cmap
#colors = harvard_colors()

T = 1000
D = 50
n = T // D

def sample_mixture_model(lmbda, p):
    """
    Simple mixture model example
    """
    # Simulate latent states
    l1, l2 = l_size[0], l_size[1]
    num_orbitals = file_input['NOrbitals'].value

    return num_orbitals, l1, l2


# ************************************
# keep these definitions for kite website
import seaborn as sns

mpl.rcParams['figure.dpi'] = 100
mpl.rcParams['savefig.dpi'] = 100
sns.set_style("white")
# Kite color scheme
colors = ["dusty purple", "faded green", "windows blue", "amber", "greyish"]
current_palette = sns.xkcd_palette(colors)
sns.set_palette(current_palette)
sns.set_style("ticks")
sns.set_context("talk", font_scale=1.3)

# ************************************
# read h5 just to know the number of moments, enters the name of the DOS file
file_name = 'phmag.h5'
moments_KITE, a_scale, b_scale = get_moments_and_scales(file_name)
num_orbitals, _, _ = get_size(file_name)

# this was the grid where the DOS was evaluated
num_points = 5000
energy1 = np.linspace(0.335, 0.37, num_points)
energy2 = np.linspace(-1.17, -1.2, num_points)
rcParams['xtick.major.width']   = 1.25
rcParams['xtick.minor.width']   = 1.25
rcParams['ytick.major.size']    = 2.5
rcParams['ytick.minor.size']    = 1.5
rcParams['ytick.major.width']   = 1.25
rcParams['ytick.minor.width']   = 1.25
rcParams['text.usetex']         = True
rcParams['xtick.major.pad']     = 6
rcParams['ytick.major.pad']     = 6
rcParams['ytick.direction']     = 'in'
rcParams['xtick.direction']     = 'in'
rcParams['figure.figsize']      = 3.5, 3.5/sc.golden


# Colours.
snscols = sns.xkcd_palette(["windows blue", "amber", "faded green", "greyish", "dusty purple", "pale red"])


# Colourmaps.
whblbk = sns.cubehelix_palette(light=1., dark=0.2, start=0.1, hue=1.0, rot=-0.3, as_cmap=True) 
bkblwh = sns.cubehelix_palette(light=1., dark=0.2, start=0.1, hue=1.0, rot=-0.3, as_cmap=True, reverse=True) 


# Clip the data for plotting.
def clip(arr, maxval=None, minval=None, maskNaN=None, log=False, minNaN=None, maxNaN=None):

    if log:
	    arr = np.log10(arr)

    if minNaN is not None:
    	maskNaN = minNaN
def drawShape( axis, shape ):
    shape.draw( sns.xkcd_palette( ["light blue"] ), 0, axis)
def create_dataCorrect_barplot(ginfo, inLCMSData):    
    #runs to loop through
    suffix_list = ["", "_C1", "_C2"]
    #labels to appear in graph legend
    list_desc = ["Original Data",
                "Correction of false positive",
                "Correction of false positive and false negative"]
    predictor_desc = "covarlist_all" # "covarlist" or "covarlist_all" or "clinOnly"
    if predictor_desc == "covarlist":
        title = "LC-MS features only"
    elif predictor_desc == "covarlist_all":
        title = "Clinical + LC-MS features"
    if predictor_desc == "clinOnly" :
            figName   = ginfo.FileNamePrefix + '_covarlist_all_' + \
                         inLCMSData + 'patients_dataCorrect' 
            tableName = ginfo.FileNamePrefix + '_covarlist_all_' + \
                        inLCMSData + 'patients.txt'
            title = "Clinical features only"
    else:
        figName   = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + inLCMSData + '_dataCorrect' 
        tableName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + inLCMSData + '.txt'
    resultsDF = pd.read_csv(outDir + 'R_' + tableName, sep=",")
    #eliminate LDA+shrinkage since it behaves strangely
    #resultsDF = resultsDF[resultsDF['Unnamed: 0'] != "LDA+shrinkage"]
    alg_names = resultsDF['Unnamed: 0'] #algorithm names
    print "alg_names: " , alg_names

    initial_pos = np.arange(len(alg_names))*(
        len(suffix_list)+1)+len(suffix_list)+1
    bar_width = 1
    colors = ["taupe","teal","salmon"]
    mycolors = sns.xkcd_palette(colors)
    plt.figure(figsize=(6.7,8)) #to place next to one another
    #cycle through each patient list
    plots = []
    for counter, suffix in enumerate(suffix_list):
        if predictor_desc == "clinOnly" :
            tableName = ginfo.FileNamePrefix + '_covarlist_all_' + \
                        inLCMSData + 'patients' + suffix + '.txt'
        else:
            tableName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + \
                        inLCMSData + suffix + '.txt'
        resultsDF = pd.read_csv(outDir + 'R_' + tableName, sep=",")
        #eliminate LDA+shrinkage since it behaves strangely
        #resultsDF = resultsDF[resultsDF['Unnamed: 0'] != "LDA+shrinkage"]
        measurements = np.array(resultsDF['cvAUC'])
        z = stats.norm.ppf(.95)
        SEs = [( np.array(resultsDF['cvAUC']) - np.array(resultsDF['ci_low']) )/z, 
               ( np.array(resultsDF['ci_up']) - np.array(resultsDF['cvAUC']) )/z ]
        alg_pos = initial_pos - counter 
        print "measurements: " , measurements
        print "alg_pos: " , alg_pos
        plot = plt.barh(bottom=alg_pos, width=measurements, height=bar_width,
                        xerr=SEs, error_kw=dict(ecolor='.1', lw=1, capsize=1, capthick=1),
                        align='center', alpha=1, 
                        color=mycolors[counter], label=list_desc[counter])
        #add numeric values to plot
        xpos = np.array(resultsDF['ci_low']) -.05
        ypos = alg_pos - .3
        mytext = ["%.2f" % x for x in measurements]    
        for place, text in enumerate(mytext):
            plt.text(xpos[place], ypos[place], text, color="white", fontsize=10)
        plots.append(plot)
    plt.xlabel = "cvAUC"
    plt.title(title)
    plt.xlim(.5, 1)
    plt.ylim(0,max(initial_pos)+2)
    print "counter: " , counter
    plt.yticks(initial_pos - counter/2, alg_names)
    plt.legend(prop={'size':8})
    plt.tight_layout()
    plt.savefig(outDir + figName + '.eps', dpi=1200)
    plt.close()
示例#38
0
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns




#Takes a combined dataframe of all the results

#Gmean plot


colors = ["windows blue", "orange red", "light brown", "amber", 'purple', 'jade', 'grey']  
myPalette = sns.xkcd_palette(colors) #passing colors to xkcd_palette function

sns.set(style="white") #white background
g = sns.factorplot(x="samp_technique", y="g_mean", hue="classifier", data=combined_df1, saturation=5, size=5, aspect=2.4, kind="bar",
          palette= myPalette, legend=False,) #removes legend

g.set(ylim=(0, 1)) 
g.despine(right=False) 
g.set_xlabels("") 
g.set_ylabels("G-mean Score")  
g.set_yticklabels("") 


#Matplotlib --legend creation

myLegend=plt.legend(bbox_to_anchor=(0., 1.2, 1., .102), prop ={'size':7.5}, loc=10, ncol=3, #3 rows per legend 
                                        #left, bottom, width, height
        title=r'ROC Score per sampling technique and classifier')                    
示例#39
0
import json
import h5py


import numpy as np
import subprocess as sub
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.signal import savgol_filter

#rc_params = {'lines.linewidth':1.1, 'text.latex.preamble': [r'\usepackage{siunitx}', r'\sisetup{detect-all}', r'\renewcommand*\sfdefault{lcmss}', r'\usepackage{sansmath}', r'\sansmath'], 'text.usetex':True}
rc_params = {'lines.linewidth':1.5, 'text.usetex':True}


palette = sns.color_palette(sns.xkcd_palette(['denim blue','orange red', 'golden', 'medium green','fuchsia', 'aquamarine', 'burnt sienna']), n_colors=10)
sns.set(style='ticks', font='serif', palette='Set1', context='paper', font_scale=1.4, rc=rc_params)


class Trajectory(object):

  """This class defines the object of a gromacs trajectory"""

  def __init__(self, traj_path, basename, basename_trr=None, tracking_dir=os.path.expanduser('~/HiWi/WW/tracking/')):
    """Init sets the path in which the trajectory is found and will define
    respective files for python

    :traj_path: path of trajectory

    """
    if basename_trr == None:
# -*- coding: utf-8 -*-
"""
Created on Wed Sep  8 21:03:11 2021

@author: kevin
"""

import torch
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

import seaborn as sns
color_names = ["windows blue", "red", "amber", "faded green"]
colors = sns.xkcd_palette(color_names)
sns.set_style("white")
sns.set_context("talk")

from torch.nn.functional import binary_cross_entropy, binary_cross_entropy_with_logits
#from torch.utils.data import DataLoader
#from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid

# %% Simple rate RNN generative model
dt, T, N, tau, s = 0.01, 100, 20, 1, .5
v1, v2 = np.random.randn(N), np.random.randn(N)
Jij = s * np.sqrt(N) * np.random.randn(
    N, N)  #+ np.outer(v1,v1) + np.outer(v2,v2) +np.outer(v1,v2)
### Mask for sparsity
sparsity = 0.6
def evaluation(dataset, data_dir, plot_dir):
    plt.rcdefaults()

    #Styles
    sns.set_style('whitegrid', {'axes.linewidth':1.25, 'axes.edgecolor':'0.15',
                                'grid.linewidth':1.5, 'grid.color':'gray'})
    sns.set_color_codes()
    plt.rcParams['figure.figsize'] = (12.0, 9.0)
    plt.rc('text', usetex=False)
    plt.rc('font', size=14.0, family='sans-serif')

    # Data location and scenario
    preprocessor='all'

    # Load configurations
    reader = cr.ConfigReader(data_dir=data_dir, dataset=dataset)
    tdf = reader.load_validation_trajectories(preprocessor=preprocessor, load_config=True)

    # Decode number of layers
    tdf.loc[:, ('classifier','num_layers')] = tdf['classifier']['num_layers'].apply(lambda X:ord(X)-ord('a'))

    ## Plot average best architectures
    top5 = tdf.sort_values([('smac','test_performance')]).head(1)
    lays = np.int(np.ceil(np.array(top5['classifier']['num_layers']).mean()))

    labels_list = ['Layer_'+str(i) for i in range(1,7)]
    pre_m = top5['preprocessor']['choice'].describe().top

    activations = []
    n_layers = []
    weights = []
    for i in np.arange(1, lays):
        activations.append(top5['classifier']['activation_layer_'+str(i)].describe().top)
        n_layers.append(top5['classifier']['num_units_layer_'+str(i)].mean())
        weights.append(top5['classifier']['weight_init_'+str(i)].describe().top)

    tab = top5.classifier.T.dropna()
    table_list = ['batch_size', 'dropout_output', 'learning_rate', 'lambda2', 'number_epochs', 'solver']
    t = tab.loc[table_list]
    t = t.append(top5['preprocessor']['choice'])

    a = pd.Series(np.array(n_layers))
    botoms = np.fabs(a.sub(a.max()))/2

    activ_list = ['relu', 'elu', 'leaky', 'sigmoid', 'tanh', 'scaledTanh', 'linear']
    colr_list = sns.xkcd_palette(["windows blue", "pastel blue", "grey blue", "red orange", "emerald", "pine green", "amber"])
    activation_color_codes = dict(zip(activ_list,colr_list))

    bar_width = 0.1
    colors_bars = [activation_color_codes.get(i) for i in activations]
    with sns.axes_style('ticks'):
        fig_arch = plt.figure(1, figsize=(15.,9.))
        ax_arch = plt.subplot(111)
        bars = ax_arch.bar(np.arange(lays-1)-(bar_width/2), a,
                           bottom=botoms, width=bar_width, color=colors_bars)
        sns.despine(left=True)
        ax_arch.set_ylabel('Number of units in Layer')
        ax_arch.set_yticklabels([])
        ax_arch.set_yticks([])
        ax_arch.set_xticks(np.arange(lays-1))
        ax_arch.set_xticklabels(labels_list[:lays-1])
        ax_arch = autolabel(bars, ax_arch)
        table_ax(ax_arch, t)
        ax_arch.legend([b for b in bars], activations, loc='best')
        ax_arch.set_title('Single best architecture found for dataset %s' % dataset)
        ax_arch.set_xlim(-0.5, lays-1)
        fig_arch.savefig(plot_dir + "Best_architecture_on_%s.pdf" % dataset)

    # Start filtering the error
    temp_df = tdf.copy()
    temp_df.columns = tdf.columns.droplevel(0)
    min_perf = temp_df['test_performance'].min()
    mean_perf = temp_df['test_performance'].mean()
    std_perf = temp_df['test_performance'].std()
    qtil_10 = temp_df['test_performance'].quantile(0.1)
    del temp_df

    m = tdf[('smac', 'test_performance')] <= qtil_10

    # Setting values to log scale and categorical values
    log_columns = ['beta1', 'beta2', 'gamma', 'lambda2', 'learning_rate', 'momentum','num_units_layer_1',
                   'num_units_layer_2', 'num_units_layer_3', 'num_units_layer_4', 'num_units_layer_5',
                   'num_units_layer_6', 'power', 'std_layer_1', 'std_layer_2', 'std_layer_3','std_layer_4',
                   'std_layer_5', 'std_layer_6']

    for lc in log_columns:
        try:
            tdf.loc[:, ('classifier', lc)] = np.log10(tdf.loc[:, ('classifier', lc)])
        except KeyError:
            continue

    ## After Setting the frames. Start with the plotting
    plt.clf()

    # Plot the empirical CDF
    sorted_train = (tdf['smac']['train_performance'].sort_values(ascending=True).values)
    sorted_test = (tdf['smac']['test_performance'].sort_values(ascending=True).values)
    ytrain = np.arange(len(sorted_train)) / float(len(sorted_train))
    ytest = np.arange(len(sorted_test)) / float(len(sorted_test))

    plt.step(sorted_train, ytrain, label="Train Performance", lw=2.5)
    plt.step(sorted_test, ytest, label="Test Performance", lw=2.5)
    plt.xlabel("Cross-validation error $y(x)$")
    plt.ylabel(r"Number of Configs (%)")
    plt.xlim(0.0, min(1.0, sorted_test.max()+0.01))
    plt.title("Empirical CDF of configurations based on error")
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(plot_dir + 'CDF_Error_%s.pdf' % dataset)

    categories=['solver','lr_policy','num_layers']
    mask_filter = tdf[('smac','test_performance')] <= qtil_10
    filtered = tdf[mask_filter]
    for category in categories:
        fig_f, axs = plt.subplots(ncols=2, nrows=1, figsize=(15.0, 10.5))
        ax0, ax1 = axs.flat
        sns.boxplot(x=('classifier', category), y=('smac','test_performance'), data=filtered.sort_values(by=[('classifier', category)]), ax=ax0)
        ax0.set_xlabel(category)
        ax0.set_ylabel('Test error performance')
        ax0.set_title('Error distribution based on %s' % category)
        sns.countplot(x=('classifier', category), data=filtered.sort_values(by=[('classifier', category)]), ax=ax1)
        ax1.set_xlabel(category)
        ax1.set_ylabel('Times used')
        ax1.set_title('Bar plot of frequency of %s' % category)
        fig_f.suptitle("Descriptive stats of %s on dataset %s using 10%% of configurations" % (category, dataset), y=0.98)
        # fig_f.tight_layout()
        fig_f.savefig(plot_dir + 'Descriptive_plots_over_%s_on_%s.pdf' % (category, dataset))
        fig_f.show()

    ## Plot distro over learning rates
    # Create the grouping of the filtered DF
    classifier_df = tdf[m]['classifier']
    solver_filt = classifier_df.groupby('solver')

    # with sns.color_palette('Set1',8):
        # for name,groups in solver_filt:
            # plt.hist(groups.learning_rate.values, alpha=0.5, bins=20, label=name)
        # plt.legend()

    col_hist = sns.color_palette('Paired',8, desat=0.8)
    rows_to_plot = np.int(np.ceil(len(solver_filt)/2.))
    fig2, axs = plt.subplots(nrows=rows_to_plot, ncols=2, figsize=(12.,17.))
    fig2.suptitle('Distribution of learning rate values for each\
                  solver on dataset %s \n (based on 50%% best configurations)' % dataset, y=1.02)
    for ax, (name, groups) in zip(axs.flat,solver_filt):
        ax.hist(groups.learning_rate.values, bins=5, histtype='bar', fill=True,
                label=name, alpha=0.9, color=col_hist.pop())
        ax.set_xlabel('learning rate values (log scale)')
        ax.set_ylabel('# of Configs')
        ax.legend(loc='best')

    # plt.tight_layout()
    ax = axs.flat[-1]
    ax.set_visible(False)
    fig2.savefig(plot_dir + 'Histogram_of_learning_rate_solver_on_dataset_%s.pdf' % dataset)

    ## Plot over different preprocessing methods
    # Create the grouping of the filtered DF
    prepro_filt = tdf[m].groupby([('preprocessor','choice')])

    prepro_color = sns.color_palette('Paired',14, desat=0.8)
    fig4, axs = plt.subplots(nrows=3, ncols=5, sharex='col', figsize=(22.,12.))
    fig4.suptitle('Distribution of learning rate for each preprocessor on dataset %s'% dataset, y=1.02 )
    for ax, (name, grops) in zip(axs.flat,prepro_filt):
        groups = grops['classifier']
        ax.hist(groups.learning_rate.values, bins=5, histtype='bar', fill=True, label=name,
                color=prepro_color.pop())
        ax.set_xlabel('learning rate values (log scale)')
        ax.set_ylabel('# of Configs')
        ax.legend(loc='best')
    # plt.tight_layout()
    fig4.savefig(plot_dir + 'Histogram_of_learning_rate_prepro_on_dataset_%s.pdf' % dataset)
    ax.annotate('', xy=(X[i],y), xytext=(X[j],y), arrowprops=props)

# Call the function
label_diff(0,1,'p=0.0370',X,means)
label_diff(0,2,'p<0.0001',X,means)
label_diff(0,3,'p=0.0025',X,means)
label_diff(0,4,'p=0.0000',X,means)



sns.set(font_scale = 2)
sns.set_style("whitegrid")
plt.show()
quit()
col_list = ["red", "green", "blue", "purple", "coral"]
col_list_palette = sns.xkcd_palette(col_list)
sns.set_palette(col_list_palette)
sns.despine(offset=10, trim=True)
# labels = ['1_0_0', '0.95_0.05_0', '0.85_0.10_0.05',
#           '0.70_0.20_0.10','0.50_0.30_0.20','0.25_0.50_0.25']
labels = ['1_0_0', '0.95_0.05_0', '0.85_0.10_0.05', '0.7_0.2_0.1' '0.25_0.50_0.25']
expDataLabels = ['Passage 11', 'Passage 15', 'Passage 19', 'Passage 28', 'Parental']

# ax = sns.violinplot(data = frames_LCStrans_mod, cut = 0, inner = 'box')
# ax = sns.violinplot(data = expData, cut = 0, inner = 'box')



# ax = sns.boxplot(data = expData, showfliers = False)
# ax = sns.pointplot(data = frames_LCStrans_mod, estimator= median)
# fig, ax = plt.subplots()
# CURVE_P0 = (1.0, 1.0, 1.0, 1.0)
# def sigmoid(x, a0, a1, a2, a3):
#  return (a0 + a1 * x) / (1.0 + a2 * np.exp(-a3 * x))

CURVE_A0 = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=float)


def sigmoid(x, a0, a1, a2, a3, a4):
    return a0 * (1.0 - np.exp(a1 - a2 * x)) / (1.0 + np.exp(a3 - a4 * x))

compounds, maxisos = zip(*[('G6P', 6), ('F6P', 6), ('FBP', 6), ('DHAP', 3), ('xPG', 3), ('PEP', 3)])
compound2maxiso = dict(zip(compounds, maxisos))

colors = ["dusty purple", "windows blue", "teal green", "scarlet", "purplish pink", "orange"]
colpalette = sns.xkcd_palette(colors)
# colpalette = sns.color_palette("PuBuGn_d", len(compounds))

exps = ['eca', 'bsa', 'wt']
titles = [r'$\Delta$pfkA$\Delta$pfkB  +  pfkA from E. coli',
          r'$\Delta$pfkA$\Delta$pfkB  +  pfkA from B. subtilis',
          'Wild-type E. coli']
          
count_df = pd.DataFrame.from_csv('integration_results.txt', sep='\t', index_col=0)
count_df.fillna(0, inplace=True)

samples_df = pd.DataFrame.from_csv('samples.csv', index_col=0)

# remove time point - 45 min, seems to be a mistake
count_df = count_df.loc[samples_df['time (min)'] != 45, :]
    raw_counts = raw_counts.reindex(known_counts.index, fill_value=0)

    raw_errors.append(raw_counts - known_counts["count"])
raw_error_mean = pd.concat(raw_errors).mean()

errors = []

for uncertainty in [0, 5, 10, 20, 30]:
    u = "err-{}%".format(uncertainty) if uncertainty > 0 else "default"
    for mean, posterior_counts, known_counts in zip(snakemake.params.means, snakemake.input.get(u), all_known_counts):
        posterior_estimates = pd.read_table(posterior_counts, index_col=[0, 1])
        posterior_estimates = posterior_estimates.reindex(known_counts.index, fill_value=0)

        errors.append(pd.DataFrame({"error": posterior_estimates["expr_map"] - known_counts["count"], "mean": mean, "uncertainty": uncertainty}))

errors = pd.concat(errors)


x, y = snakemake.config["plots"]["figsize"]
plt.figure(figsize=(x * 1.5, y))
colors = sns.xkcd_palette(["light red"])
sns.violinplot(x="uncertainty", y="error", data=errors, bw=1, inner="quartile", palette=colors, linewidth=1)
plt.plot(plt.xlim(), [0, 0], "-k", linewidth=1, zorder=-5)
plt.plot(plt.xlim(), [raw_error_mean] * 2, ":k", linewidth=1, zorder=-5)
sns.despine()

plt.xlabel("error rate underestimation (%)")
plt.ylabel("predicted - truth")

plt.savefig(snakemake.output[0], bbox_inches="tight")
示例#45
0
def learning_curve(log_file):
    print('==> Plotting log file: %s' % log_file)

    df = pandas.read_csv(log_file)

    colors = ['red', 'green', 'blue', 'purple', 'orange']
    colors = seaborn.xkcd_palette(colors)

    plt.figure(figsize=(20, 6), dpi=500)

    row_min = df.min()
    row_max = df.max()

    # initialize DataFrame for train
    columns = [
        'epoch',
        'iteration',
        'train/loss',
        'train/acc',
        'train/acc_cls',
        'train/mean_iu',
        'train/fwavacc',
    ]
    df_train = df[columns]
    df_train = pandas.rolling_mean(df_train, window=10)
    df_train = df_train.dropna()
    iter_per_epoch = df_train.query('epoch == 1')['iteration'].values[0]
    df_train['epoch_detail'] = df_train['iteration'] / iter_per_epoch

    # initialize DataFrame for val
    columns = [
        'epoch',
        'iteration',
        'valid/loss',
        'valid/acc',
        'valid/acc_cls',
        'valid/mean_iu',
        'valid/fwavacc',
    ]
    df_valid = df[columns]
    df_valid = df_valid.dropna()
    df_valid['epoch_detail'] = df_valid['iteration'] / iter_per_epoch

    data_frames = {'train': df_train, 'valid': df_valid}

    n_row = 2
    n_col = 3
    for i, split in enumerate(['train', 'valid']):
        df_split = data_frames[split]

        # loss
        plt.subplot(n_row, n_col, i * n_col + 1)
        plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
        plt.plot(df_split['epoch_detail'], df_split['%s/loss' % split], '-',
                 markersize=1, color=colors[0], alpha=.5,
                 label='%s loss' % split)
        plt.xlim((0, row_max['epoch']))
        plt.ylim((min(row_min['train/loss'], row_min['valid/loss']),
                  max(row_max['train/loss'], row_max['valid/loss'])))
        plt.xlabel('epoch')
        plt.ylabel('%s loss' % split)

        # loss (log)
        plt.subplot(n_row, n_col, i * n_col + 2)
        plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
        plt.semilogy(df_split['epoch_detail'], df_split['%s/loss' % split],
                     '-', markersize=1, color=colors[0], alpha=.5,
                     label='%s loss' % split)
        plt.xlim((0, row_max['epoch']))
        plt.ylim((min(row_min['train/loss'], row_min['valid/loss']),
                  max(row_max['train/loss'], row_max['valid/loss'])))
        plt.xlabel('epoch')
        plt.ylabel('%s loss (log)' % split)

        # lbl accuracy
        plt.subplot(n_row, n_col, i * n_col + 3)
        plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
        plt.plot(df_split['epoch_detail'], df_split['%s/acc' % split],
                 '-', markersize=1, color=colors[1], alpha=.5,
                 label='%s accuracy' % split)
        plt.plot(df_split['epoch_detail'], df_split['%s/acc_cls' % split],
                 '-', markersize=1, color=colors[2], alpha=.5,
                 label='%s accuracy class' % split)
        plt.plot(df_split['epoch_detail'], df_split['%s/mean_iu' % split],
                 '-', markersize=1, color=colors[3], alpha=.5,
                 label='%s mean IU' % split)
        plt.plot(df_split['epoch_detail'], df_split['%s/fwavacc' % split],
                 '-', markersize=1, color=colors[4], alpha=.5,
                 label='%s fwav accuracy' % split)
        plt.legend()
        plt.xlim((0, row_max['epoch']))
        plt.ylim((0, 1))
        plt.xlabel('epoch')
        plt.ylabel('%s label accuracy' % split)

    out_file = osp.splitext(log_file)[0] + '.png'
    plt.savefig(out_file)
    print('==> Wrote figure to: %s' % out_file)
def plot_surf_label(coords, faces,
                    labels=None,
                    elev=0, azim=0,
                    cpal='bright',
                    threshold=None,
                    bg_map=None,
                    bg_on_labels=False,
                    alpha='auto',
                    darkness=1, 
                    figsize=None,
                    **kwargs):

    '''
    - labels requires a tuple of label/s, each a list/array of node indices
    - cpal takes either the name of a seaborn color palette or matplotlib color map,
      or a list of rgb values or color names from http://xkcd.com/color/rgb/
    '''

    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib.tri as tri
    from mpl_toolkits.mplot3d import Axes3D
    import seaborn as sns

    # load mesh and derive axes limits
    faces = np.array(faces, dtype=int)
    limits = [coords.min(), coords.max()]

    # set alpha if in auto mode
    if alpha == 'auto':
        if bg_map is None:
            alpha = .5
        else:
            alpha = 1

    # if cap is given as string, translate to seaborn color palette
    if type(cpal) == str:
        cpal = sns.color_palette(cpal, len(labels))
    if type(cpal) == list:
        if len(cpal) < len(labels):
            raise ValueError('There are not enough colors in the color list.')
        try:
            cpal = sns.color_palette(cpal)
        except:
            cpal = sns.xkcd_palette(cpal)

    # initiate figure and 3d axes
    if figsize is not None:
        fig = plt.figure(figsize=figsize)
    else:
        fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d', xlim=limits, ylim=limits)
    ax.view_init(elev=elev, azim=azim)
    ax.set_axis_off()

    # plot mesh without data
    p3dcollec = ax.plot_trisurf(coords[:, 0], coords[:, 1], coords[:, 2],
                                triangles=faces, linewidth=0.,
                                antialiased=False,
                                color='white')

    if bg_map is not None or labels is not None:

        face_colors = np.ones((faces.shape[0], 4))
        face_colors[:, :3] = .5*face_colors[:, :3]

        if bg_map is not None:
            bg_data = bg_map
            if bg_data.shape[0] != coords.shape[0]:
                raise ValueError('The bg_map does not have the same number '
                                 'of vertices as the mesh.')
            bg_faces = np.mean(bg_data[faces], axis=1)
            bg_faces = bg_faces - bg_faces.min()
            bg_faces = bg_faces / bg_faces.max()
            bg_faces *= darkness
            face_colors = plt.cm.gray_r(bg_faces)

        # modify alpha values of background
        face_colors[:, 3] = alpha*face_colors[:, 3]

        # color the labels, either overriding or overlaying bg_map
        if labels is not None:
            for n_label,label in enumerate(labels):
                for n_face, face in enumerate(faces):
                    count = len(set(face).intersection(set(label)))
                    if count > 1:
                        if bg_on_labels:
                            face_colors[n_face,0:3] = cpal[n_label] * face_colors[n_face,0:3]
                        else:
                            face_colors[n_face,0:3] = cpal[n_label]

        p3dcollec.set_facecolors(face_colors)

    return fig
示例#47
0
def _do_classifyplot(df, out_file, title=None, size=None):
    """Plot using classification-based plot using seaborn.
    """
    metric_labels = {"fdr": "False discovery rate",
                     "fnr": "False negative rate"}
    metrics = [("fnr", "tpr"), ("fdr", "spc")]
    colors = ["light grey", "greyish"]
    data_dict = df.set_index(["sample", "caller", "vtype"]).T.to_dict()
    plt.ioff()
    sns.set(style='white')
    vtypes = sorted(df["vtype"].unique(), reverse=True)
    callers = sorted(df["caller"].unique())
    samples = sorted(df["sample"].unique())
    fig, axs = plt.subplots(len(vtypes) * len(callers), len(metrics))
    fig.text(.5, .95, title if title else "", horizontalalignment='center', size=14)
    for vi, vtype in enumerate(vtypes):
        sns.set_palette(sns.xkcd_palette([colors[vi]]))
        for ci, caller in enumerate(callers):
            for j, (metric, label) in enumerate(metrics):
                cur_plot = axs[vi * len(vtypes) + ci][j]
                vals, labels = [], []
                for sample in samples:
                    cur_data = data_dict[(sample, caller, vtype)]
                    vals.append(cur_data[metric])
                    labels.append(cur_data[label])
                cur_plot.barh(np.arange(len(samples)), vals)
                all_vals = []
                for k, d in data_dict.items():
                    if k[-1] == vtype:
                        for m in metrics:
                            all_vals.append(d[m[0]])
                metric_max = max(all_vals)
                cur_plot.set_xlim(0, metric_max)
                pad = 0.1 * metric_max
                for ai, (val, label) in enumerate(zip(vals, labels)):
                    cur_plot.annotate(label, (pad + (0 if max(vals) > metric_max / 2.0 else max(vals)),
                                              ai + 0.35), va='center', size=7)
                if j == 0:
                    cur_plot.tick_params(axis='y', which='major', labelsize=8)
                    cur_plot.locator_params(nbins=len(samples) + 2, axis="y", tight=True)
                    cur_plot.set_yticklabels(samples, size=8, va="bottom")
                    cur_plot.set_title("%s: %s" % (vtype, caller), fontsize=12, loc="left")
                else:
                    cur_plot.get_yaxis().set_ticks([])
                if ci == len(callers) - 1:
                    cur_plot.tick_params(axis='x', which='major', labelsize=8)
                    cur_plot.get_xaxis().set_major_formatter(
                        FuncFormatter(lambda v, p: "%s%%" % (int(v) if round(v) == v else v)))
                    if vi == len(vtypes) - 1:
                        cur_plot.get_xaxis().set_label_text(metric_labels[metric], size=12)
                else:
                    cur_plot.get_xaxis().set_ticks([])
                    cur_plot.spines['bottom'].set_visible(False)
                cur_plot.spines['left'].set_visible(False)
                cur_plot.spines['top'].set_visible(False)
                cur_plot.spines['right'].set_visible(False)
    x, y = (6, len(vtypes) * len(callers) + 1 * 0.5 * len(samples)) if size is None else size
    fig.set_size_inches(x, y)
    fig.tight_layout(rect=(0, 0, 1, 0.95))
    plt.subplots_adjust(hspace=0.6)
    fig.savefig(out_file)
示例#48
0
def prepare_and_plot_1_1(dataset_name):

    all_epsilons = [
        2.0, 1.5, 1.25, 1.0986122886681098, 1.0, 0.75, 0.6931471805599453, 0.5,
        0.1, 0.01
    ]

    path_in = os.getcwd()
    pattern = '^.*/thesis-data-anonymisation/'
    path = re.search(pattern, path_in).group(0)

    safepub_path = path + "data/result/safepub_test/1_1/" + dataset_name
    safe_pub_match = "^norm_result_granularity_eps-"
    safepub_files = [
        file for file in os.listdir(safepub_path)
        if re.match(safe_pub_match, file)
    ]

    sc_path = path + "data/result/sc_test/1_1/" + dataset_name
    sc_match = "^norm_result_eps-"
    sc_files = [
        file for file in os.listdir(sc_path) if re.match(sc_match, file)
    ]

    sc_spec_path = path + "data/result/sc_spec_test/1_1/" + dataset_name
    sc_spec_match = "^norm_result_eps-"
    sc_spec_files = [
        file for file in os.listdir(sc_spec_path)
        if re.match(sc_spec_match, file)
    ]

    k_file = path + "data/result/k-anonym_test/1_1/" + dataset_name + "/norm_result_k5_suppression.csv"

    plot_path = path + "data/result/plots/1_1_" + dataset_name + "_spec.jpg"

    models = []
    information_loss = []
    epsilons = []

    for file in sc_files:
        epsilon = float(re.split(sc_match + "|" + ".csv", file)[1])
        df = pd.read_csv(sc_path + '/' + file)
        data = list(df['sse'])
        information_loss += data
        models += ['MicroDP'] * len(data)
        epsilons += [epsilon] * len(data)

    for file in sc_spec_files:
        epsilon = float(re.split(sc_match + "|" + ".csv", file)[1])
        df = pd.read_csv(sc_spec_path + '/' + file)
        data = list(df['sse'])
        information_loss += data
        models += ['MicroDP-800'] * len(data)
        epsilons += [epsilon] * len(data)

    for file in safepub_files:
        epsilon = float(re.split(safe_pub_match + "|" + ".csv", file)[1])
        df = pd.read_csv(safepub_path + '/' + file)
        data = list(df['sse'])
        information_loss += data
        models += ['SafePub'] * len(data)
        epsilons += [epsilon] * len(data)

    df = pd.read_csv(k_file)
    information_loss += [list(df['sse'])[0]] * len(all_epsilons)
    models += ['k-anonymisation'] * len(all_epsilons)
    epsilons += all_epsilons

    array = np.array([epsilons, information_loss]).T

    df = pd.DataFrame(array, columns=['ε', 'Information loss'])
    df['Model'] = models

    ax = sns.lineplot(x='ε',
                      y='Information loss',
                      hue='Model',
                      data=df,
                      palette=sns.xkcd_palette([
                          'windows blue', 'dark blue', 'amber', 'faded green'
                      ]))
    ax.set(ylim=(0, 1.05))
    plt.show()
    #plt.savefig(plot_path)
    plt.clf()
# In[6]:

denver_loc = (-104.9903, 39.7392)
miami_loc = (-80.2089, 25.7753)

denver = daymet.get_daymet_singlepixel(longitude=denver_loc[0], latitude=denver_loc[1], 
                                   years=[2012, 2013, 2014])
miami = daymet.get_daymet_singlepixel(longitude=miami_loc[0], latitude=miami_loc[1], 
                                   years=[2012, 2013, 2014])


# In[9]:

sns.set_context("talk")
fig, ax1 = plt.subplots(1, figsize=(18, 10))
den_15day = denver.rolling(center=False,window=15).mean()
ax1.fill_between(den_15day.index, den_15day.tmin, den_15day.tmax, 
                 alpha=0.4, lw=0, label='Denver', color=sns.xkcd_palette(['faded green'])[0])

ax1.set_title('Denver vs Miami temps (15 day rolling mean)', fontsize=20)

miami_15day = miami.rolling(center=False,window=15).mean()
ax1.fill_between(miami_15day.index, miami_15day.tmin, miami_15day.tmax, 
                 alpha=0.4, lw=0, label='Miami', color=sns.xkcd_palette(['dusty purple'])[0])

ax1.set_ylabel(u'Temp. (°C)', fontsize=20)
fig.tight_layout()
plt.legend(fontsize=20)

示例#50
0
df.loc[:, 'cap_BATT'] = df.apply(
    lambda x: x['cap_ELC_DIST'] + x['cap_ELC_CENTRAL'], axis=1)

#-----------------------------------------------------
# Aesthetics (style + context)
# https://seaborn.pydata.org/tutorial/aesthetics.html
#-----------------------------------------------------
resolution = 1000  # Resolution (DPI - dots per inch)
style = 'white'  # options: "white", "whitegrid", "dark", "darkgrid", "ticks"
context = 'talk'  # options "paper", "notebook", "talk", "poster" (smallest -> largest)

# Series palette options
colorblind_palette = sns.color_palette(
    'colorblind')  # https://seaborn.pydata.org/tutorial/color_palettes.html
xkcd_palette = sns.xkcd_palette(
    ["royal blue", "tangerine", "greyish", "faded green",
     "raspberry"])  # https://xkcd.com/color/rgb/
custom_palette = [(0.380, 0.380, 0.380), (0.957, 0.451, 0.125),
                  (.047, 0.149, 0.361),
                  (0.847, 0.000, 0.067)]  # Custom palette

#-----------------------------------------------------
# Plotting Inputs
#-----------------------------------------------------
# x variables, all lists are expected to the same length
x_var = "year"  # Need to be columns in DataFrame
x_label = "Year (-)"  # Note: keep short
x_convert = 1.0  # Multiplier to convert to display units
x_tick = []  # Ok to leave empty
x_lim = []  # Ok to leave empty
示例#51
0
import logging
from typing import *
import seaborn

logger = logging.getLogger(__name__)

seaborn.set_palette(
    seaborn.xkcd_palette(
        ["windows blue", "amber", "faded green", "dusty purple"]))


def _init_mapping_ancestors():
    colors = ["windows blue", "amber", "faded green", "dusty purple"]
    ancestors = ["Archaea", "Actinobacteria", "Enterobacterales", "FCB group"]
    palette = seaborn.xkcd_palette(colors)
    return {x[0]: x[1] for x in zip(ancestors, palette)}

    # def _init_mapping_ancestors():
    #     colors = ["windows blue", "amber", "faded green", "dusty purple"]
    #     ancestors = ["Archaea", "Actinobacteria", "Enterobacterales", "FCB group"]
    #
    #     color_pal = seaborn.color_palette("colorblind", 6).as_hex()
    #     colors = ','.join(color_pal)
    #     palette = seaborn.color_palette(color_pal)
    #

    return {x[0]: x[1] for x in zip(ancestors, palette)}


def _init_mapping_verified():
    colors = [
示例#52
0
def prepare_and_plot_1_1_safepub(dataset_name):
    all_epsilons = [
        2.0, 1.5, 1.25, 1.0986122886681098, 1.0, 0.75, 0.6931471805599453, 0.5,
        0.1, 0.01
    ]

    path_in = os.getcwd()
    pattern = '^.*/thesis-data-anonymisation/'
    path = re.search(pattern, path_in).group(0)

    safepub_path = path + "data/result/safepub_test/1_1/" + dataset_name
    safe_pub_match = "^norm_result_granularity_eps-"
    safepub_files = [
        file for file in os.listdir(safepub_path)
        if re.match(safe_pub_match, file)
    ]

    k_file = path + "data/result/k-anonym_test/1_1/" + dataset_name + "/norm_result_k5_suppression.csv"

    plot_path = path + "data/result/plots/1_1_safepub_" + dataset_name + ".jpg"

    models = []
    information_loss = []
    epsilons = []
    metrics = []

    for file in safepub_files:
        epsilon = float(re.split(safe_pub_match + "|" + ".csv", file)[1])
        df = pd.read_csv(safepub_path + '/' + file)

        data_disc = list(df['discernibility'])
        information_loss += data_disc
        metrics += ['Discernibility'] * len(data_disc)
        models += ['SafePub'] * len(data_disc)
        epsilons += [epsilon] * len(data_disc)

        data_ent = list(df['entropy'])
        information_loss += data_ent
        metrics += ['Non-uniform entropy'] * len(data_ent)
        models += ['SafePub'] * len(data_ent)
        epsilons += [epsilon] * len(data_ent)

    df = pd.read_csv(k_file)
    information_loss += [list(df['discernibility'])[0]] * len(all_epsilons)
    metrics += ['Discernibility'] * len(all_epsilons)
    models += ['k-anonymisation'] * len(all_epsilons)
    epsilons += all_epsilons
    information_loss += [list(df['entropy'])[0]] * len(all_epsilons)
    metrics += ['Non-uniform entropy'] * len(all_epsilons)
    models += ['k-anonymisation'] * len(all_epsilons)
    epsilons += all_epsilons

    array = np.array([epsilons, information_loss]).T

    df = pd.DataFrame(array, columns=['ε', 'Information loss'])
    df['Model'] = models
    df['Metric'] = metrics

    ax = sns.lineplot(x='ε',
                      y='Information loss',
                      hue='Model',
                      style='Metric',
                      data=df,
                      palette=sns.xkcd_palette(['amber', 'faded green']))
    ax.set(ylim=(0, 1.05))
    #plt.show()
    plt.savefig(plot_path)
    plt.clf()

    return
示例#53
0
import numpy as np
import random
from matplotlib import colors
import hilbert


color_list = ['cyan', 'goldenrod', 'seafoam green', 'light yellow', 'scarlet',
        'neon blue', 'barney purple', 'reddish orange', 'lemon', 'cerise',
        'light lime green', 'teal blue', 'bubblegum pink', 'black', 'black',
        'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black',
        'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black',
        'black', 'black', 'black', 'black', 'black', 'vermillion', 'amber',
        'melon', 'purpleish', 'bright light blue', 'strawberry', 'celadon']

n_colors = len(color_list)
palette = sns.xkcd_palette(color_list)
cmap = colors.ListedColormap(palette)
bounds = range(n_colors+1)
norm = colors.BoundaryNorm(bounds, cmap.N)

def generate_random_color():
    idx = random.choice(range(n_colors))
    return idx


def plot_hilbert_curve(n, ax):
    locs = hilbert.generate_locations(n)
    for i in xrange(len(locs)-1):
        start, finish = locs[i], locs[i+1]
        xs, ys = zip(start,finish)
        ax.plot(xs, ys, 'white', alpha=0.4, lw='1')
示例#54
0
def _init_mapping_independence_conditions():
    colors = ["windows blue", "amber", "faded green"]
    conditions = ["Random", "Independent", "Fully dependent"]
    palette = seaborn.xkcd_palette(colors)
    return {x[0]: x[1] for x in zip(conditions, palette)}
示例#55
0
def _init_mapping_archea_bacteria():
    colors = ["magenta", "windows blue"]
    name = ["Archaea", "Bacteria"]
    palette = seaborn.xkcd_palette(colors)
    return {x[0]: x[1] for x in zip(name, palette)}
示例#56
0
def _init_mapping_stop_codons():
    colors = ["windows blue", "amber", "faded green"]
    conditions = ["TAG", "TGA", "TAA"]
    palette = seaborn.xkcd_palette(colors)
    return {x[0]: x[1] for x in zip(conditions, palette)}
def plot_surf_stat_map(coords, faces, stat_map=None,
                       elev=0, azim=0,
                       cmap='coolwarm',
                       threshold=None, bg_map=None,
                       bg_on_stat=False,
                       alpha='auto',
                       darkness=1,
                       vmax=None, symmetric_cbar="auto",
                       figsize=None,
                       labels=None, label_cpal=None,
                       mask=None, mask_lenient=None,
                       **kwargs):

    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib.tri as tri
    from mpl_toolkits.mplot3d import Axes3D
    import seaborn as sns

    # load mesh and derive axes limits
    faces = np.array(faces, dtype=int)
    limits = [coords.min(), coords.max()]

    # set alpha if in auto mode
    if alpha == 'auto':
        if bg_map is None:
            alpha = .5
        else:
            alpha = 1

    # if cmap is given as string, translate to matplotlib cmap
    if type(cmap) == str:
        cmap = plt.cm.get_cmap(cmap)

    # initiate figure and 3d axes
    if figsize is not None:
        fig = plt.figure(figsize=figsize)
    else:
        fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d', xlim=limits, ylim=limits)
    ax.view_init(elev=elev, azim=azim)
    ax.set_axis_off()

    # plot mesh without data
    p3dcollec = ax.plot_trisurf(coords[:, 0], coords[:, 1], coords[:, 2],
                                triangles=faces, linewidth=0.,
                                antialiased=False,
                                color='white')

    # where mask is indices of nodes to include:
    if mask is not None:    
        cmask = np.zeros(len(coords))
        cmask[mask] = 1
        cutoff = 2 # include triangles in cortex only if ALL nodes in mask
        if mask_lenient: # include triangles in cortex if ANY are in mask
            cutoff = 0
        fmask = np.where(cmask[faces].sum(axis=1) > cutoff)[0]

    # If depth_map and/or stat_map are provided, map these onto the surface
    # set_facecolors function of Poly3DCollection is used as passing the
    # facecolors argument to plot_trisurf does not seem to work
    if bg_map is not None or stat_map is not None:

        face_colors = np.ones((faces.shape[0], 4))
        face_colors[:, :3] = .5*face_colors[:, :3]

        if bg_map is not None:
            bg_data = bg_map
            if bg_data.shape[0] != coords.shape[0]:
                raise ValueError('The bg_map does not have the same number '
                                 'of vertices as the mesh.')
            bg_faces = np.mean(bg_data[faces], axis=1)
            bg_faces = bg_faces - bg_faces.min()
            bg_faces = bg_faces / bg_faces.max()
            bg_faces *= darkness
            face_colors = plt.cm.gray_r(bg_faces)

        # modify alpha values of background
        face_colors[:, 3] = alpha*face_colors[:, 3]

        if stat_map is not None:
            stat_map_data = stat_map
            stat_map_faces = np.mean(stat_map_data[faces], axis=1)

            # Call _get_plot_stat_map_params to derive symmetric vmin and vmax
            # And colorbar limits depending on symmetric_cbar settings
            cbar_vmin, cbar_vmax, vmin, vmax = \
                _get_plot_stat_map_params(stat_map_faces, vmax,
                                          symmetric_cbar, kwargs)

            if threshold is not None:
                kept_indices = np.where(abs(stat_map_faces) >= threshold)[0]
                stat_map_faces = stat_map_faces - vmin
                stat_map_faces = stat_map_faces / (vmax-vmin)
                if bg_on_stat:
                    face_colors[kept_indices] = cmap(stat_map_faces[kept_indices]) * face_colors[kept_indices]
                else:
                    face_colors[kept_indices] = cmap(stat_map_faces[kept_indices])
            else:
                stat_map_faces = stat_map_faces - vmin
                stat_map_faces = stat_map_faces / (vmax-vmin)
                if bg_on_stat:
                    if mask is not None:
                        face_colors[fmask] = cmap(stat_map_faces)[fmask] * face_colors[fmask]
                    else:
                        face_colors = cmap(stat_map_faces) * face_colors
                else:
                    if mask is not None:
                        face_colors[fmask] = cmap(stat_map_faces)[fmask] * face_colors[fmask]
                    else:
                        face_colors = cmap(stat_map_faces)

        if labels is not None:
            '''
            labels requires a tuple of label/s, each a list/array of node indices
            ----------------------------------------------------------------------
            color palette for labels
            if label_cpal is None, outlines will be black
            if it's a color palette name, a different color for each label will be generated
            if it's a list of rgb or color names, these will be used
            valid color names from http://xkcd.com/color/rgb/
            '''
            if label_cpal is not None:
                if type(label_cpal) == str:
                    cpal = sns.color_palette(label_cpal, len(labels))
                if type(label_cpal) == list:
                    if len(label_cpal) < len(labels):
                        raise ValueError('There are not enough colors in the color list.')
                    try:
                        cpal = sns.color_palette(label_cpal)
                    except:
                        cpal = sns.xkcd_palette(label_cpal)

            for n_label, label in enumerate(labels):
                for n_face, face in enumerate(faces):
                    count = len(set(face).intersection(set(label)))
                    if (count > 0) & (count < 3):
                        if label_cpal is None:
                            face_colors[n_face,0:3] = sns.xkcd_palette(["black"])[0]
                        else:
                            face_colors[n_face,0:3] = cpal[n_label]

        p3dcollec.set_facecolors(face_colors)

    return fig
示例#58
0
experiment = "drosophila-4-rdpg-sbm"
run = 2
config = utils.load_config(base_path, experiment, run)
sbm_df = utils.load_pickle(base_path, experiment, run, "sbm_master_df")
tsbm_df = utils.load_pickle(base_path, experiment, run, "tsbm_df")
tsbm_df["sim_ind"] = 0
#%% [markdown]
# ### Plot the noise observed in SBM model fitting

#%%
# Plotting setup}
plt.style.use("seaborn-white")
sns.set_context("talk", font_scale=1.5)
plt_kws = dict(s=75, linewidth=0, legend="brief")
sbm_cmap = sns.light_palette("purple", as_cmap=True)
rdpg_cmap = sns.xkcd_palette(["grass green"])

# Plot 1
plt.figure(figsize=(22, 12))
sns.scatterplot(
    data=sbm_df,
    x="n_params_gmm",
    y="mse",
    hue="n_block_try",
    size="n_components_try",
    alpha=0.5,
    palette=sbm_cmap,
    **plt_kws,
)
plt.xlabel("# Params (GMM params for SBMs)")
plt.ylabel("MSE")
示例#59
0
import itertools
# These are the colors. Notice how this is programmed:
# You initialize your colors by 
# colorset = palette()
# then you can cycle through the colors:
# color = next(colorset)
# if you want your set to be reset, just create
# a new palette() instance! This way the colors do not interfere.

color_names = ['windows blue', "pale red", "faded green", "amber", 
          'dark green', 'dark fuchsia', 'browny orange', 
          'puke green', 'dark royal blue', 'dusty purple', 
               'red orange','dark grey','blue grey', 'bright purple', 'chocolate brown',
              'shit', 'pistachio','stone','asparagus','butter']

colors = sns.xkcd_palette(color_names)
palette = lambda: itertools.cycle(sns.xkcd_palette(color_names) )

fontsize_labels = 26    # size used in latex document
rcParams['text.latex.preamble'] = [r'\usepackage[cmbright]{sfmath}']
rcParams['font.family']= 'sans-serif'
rcParams['font.sans-serif']= 'cmbright'
rcParams['font.weight'] = "light"

rcParams['text.usetex'] = True

rcParams['figure.autolayout'] = True
rcParams['font.size'] = fontsize_labels
rcParams['axes.labelsize'] = fontsize_labels
rcParams['xtick.labelsize'] = fontsize_labels
rcParams['ytick.labelsize'] = fontsize_labels
import matplotlib
matplotlib.rcParams.update({'font.sans-serif' : 'Helvetica',
                            'axes.labelsize': 10,
                            'xtick.labelsize' : 6,
                            'ytick.labelsize' : 6,
                            'axes.titlesize' : 10})
import matplotlib.pyplot as plt

import seaborn as sns
color_names = ["windows blue",
               "amber",
               "crimson",
               "faded green",
               "dusty purple",
               "greyish"]
colors = sns.xkcd_palette(color_names)
sns.set(style="white", palette=sns.xkcd_palette(color_names), color_codes = False)

# +
X_columns = ['total_travel_time',
       'total_travel_cost', 
#              'total_travel_distance', 
             'cross_bay', 'household_size', 'num_kids', 
              'cars_per_licensed_drivers', 
             'gender'
             
            ]

y_column = data['mode_id']
# -