Exemplo n.º 1
0
def get_samples(depends_on, includes, burn):
    """Get all samples from all runs."""

    paths = make_paths(depends_on, includes, 0)
    sample_files = [f for f in ld(paths['model_dir']) if 'samples_' in f]
    dfs = [
        pd.read_csv(pj(paths['model_dir'], f), index_col=0).ix[1000:]
        for f in sample_files
        ]
    df = pd.concat(dfs, axis=0)

    for vcol in [c for c in df.columns if 'v_' in c]:

        df[vcol] = -df[vcol]

    for i, iv in enumerate(['age', 'group', 'group:age', 'Intercept'], 1):
        plt.subplot(2, 2, i)
        if iv == 'Intercept':
            _df = df[['%s_%s' % (p, iv) for p in 'atv']]
        else:
            _df = df[['%s_%s' % (p, iv) for p in depends_on]]
        sns.violinplot(data=_df)
        plt.hlines(0, 0, 3)
    plt.show()

    return df
def figure2_1():
    global figureIndex
    plt.figure(figureIndex)
    figureIndex += 1
    sns.violinplot(data=np.random.randn(200,10) + np.random.randn(10))
    plt.xlabel("Action")
    plt.ylabel("Reward distribution")
Exemplo n.º 3
0
    def variance_plot(self, df, item):
        
        # Pass in entire df since need col name either eay
        df = df.xs(item)

        # Set up plot
        fig = sns.plt.figure()
        #fig.set_size_inches(10,10)
        
        # Plot Data
        sns.violinplot(df, bw=.15, cut=0)  # TODO: fix
        sns.boxplot(df, linewidth=2) 
        #sns.plt.plot(base_line * np.ones(len(df)+2), 'red', lw=2)
        
        # Plot Settings
        sns.plt.yticks(fontsize=12)
        sns.plt.ylabel(self.y_LUT[item], fontsize=16)
        #sns.plt.tight_layout()
        
        x_labels = range(len(df.columns))
        ax = sns.plt.subplot()
        ax.set_xticklabels(x_labels)
        
        # Plot info webpage
        title = item + ' Variance after Convergence'
        text = pd.DataFrame(
                            df.columns.tolist(), 
                            columns=df.columns.names, index=x_labels
                            ).transpose()
        
        info = {'data': fig, 'title': title, 'text': text}

        return info
Exemplo n.º 4
0
def plot_distances(distance_data, filename, title, plot_variable='distance'):
    seeds = sorted(set(distance_data['region']))
    distance_data = distance_data.sort_values(['region', 'cutoff'])
    sns.set()
    num_plots = len(seeds)
    figure, axes_sets = plt.subplots(nrows=num_plots, ncols=1, squeeze=False)
    axes_sets = list(chain(*axes_sets))  # 2-dim array -> 1-dim list
    for ax, seed in zip(axes_sets, seeds):
        seed_data = distance_data[distance_data['region'] == seed]
        seed_data = seed_data.assign(
            count=lambda df: df['cutoff'].map(
                df.groupby(by=['cutoff'])[plot_variable].count()))
        seed_data['cutoff_n'] = seed_data.apply(format_cutoff, 'columns')

        sns.violinplot(x='cutoff_n',
                       y=plot_variable,
                       data=seed_data,
                       cut=0,
                       alpha=0.7,
                       ax=ax)
        plt.setp(ax.lines, zorder=100)
        plt.setp(ax.collections, zorder=100)
        sns.swarmplot(x='cutoff_n',
                      y=plot_variable,
                      data=seed_data,
                      color='k',
                      ax=ax)
        ax.set_ylabel(seed + '\n' + plot_variable)
    axes_sets[0].set_title(title)
    plt.savefig(filename)
    def show_results(self):
        import seaborn as sns
        import matplotlib.pyplot as plt
        import pandas as pd
        plt.hold(True)
        sns.set(style="whitegrid", palette="pastel", color_codes=True)
        plt.figure(figsize=(35, 20))

        data_dist = {"distances": [], "image": [], "slice": []}

        if self.dim_im == 2:
            data_dist["distances"].append([dist * self.dim_pix for dist in self.dist1_distribution])
            data_dist["image"].append(len(self.dist1_distribution) * [1])
            data_dist["slice"].append(len(self.dist1_distribution) * [0])

            data_dist["distances"].append([dist * self.dim_pix for dist in self.dist2_distribution])
            data_dist["image"].append(len(self.dist2_distribution) * [2])
            data_dist["slice"].append(len(self.dist2_distribution) * [0])

        if self.dim_im == 3:
            for i in range(len(self.distances)):
                data_dist["distances"].append([dist * self.dim_pix for dist in self.dist1_distribution[i]])
                data_dist["image"].append(len(self.dist1_distribution[i]) * [1])
                data_dist["slice"].append(len(self.dist1_distribution[i]) * [i])
                data_dist["distances"].append([dist * self.dim_pix for dist in self.dist2_distribution[i]])
                data_dist["image"].append(len(self.dist2_distribution[i]) * [2])
                data_dist["slice"].append(len(self.dist2_distribution[i]) * [i])

        for k in data_dist.keys():  # flatten the lists in data_dist
            data_dist[k] = [item for sublist in data_dist[k] for item in sublist]

        data_dist = pd.DataFrame(data_dist)
        sns.violinplot(x="slice", y="distances", hue="image", data=data_dist, split=True, inner="point", cut=0)
        plt.savefig('violin_plot.png')
Exemplo n.º 6
0
def biplot() :

    #read in all the input data
    cpdtr = pd.read_csv("./Data/coupon_detail_train.csv")
    cpltr = pd.read_csv("./Data/coupon_list_train.csv")
    cplte = pd.read_csv("./Data/coupon_list_test.csv")
    ulist = pd.read_csv("./Data/user_list.csv")

    # Merge detail with user
    m = pd.merge(cpdtr, ulist, left_on = "USER_ID_hash", right_on = "USER_ID_hash")
    m = pd.merge(m, cpltr, left_on = "COUPON_ID_hash", right_on = "COUPON_ID_hash")

    import seaborn as sns

    sns.violinplot(x="AGE", y="CATALOG_PRICE", hue="SEX_ID", data=m)

    # plt.figure(1)
    # plt.scatter(m["CATALOG_PRICE"][(m["SEX_ID"] == "m").values], m["DISCOUNT_PRICE"][(m["SEX_ID"] == "m").values], c="r", label = "male", alpha = 0.5)
    # plt.scatter(m["CATALOG_PRICE"][(m["SEX_ID"] == "f").values], m["DISCOUNT_PRICE"][(m["SEX_ID"] == "f").values], c="b", label = "female", alpha = 0.5)
    # plt.legend()
    # plt.title("Nope")

    # plt.figure(2)
    # plt.scatter(m["AGE"][(m["SEX_ID"] == "f").values], m["CATALOG_PRICE"][(m["SEX_ID"] == "f").values], c="b", label = "female", alpha = 0.5)
    # plt.scatter(m["AGE"][(m["SEX_ID"] == "m").values], m["CATALOG_PRICE"][(m["SEX_ID"] == "m").values], c="r", label = "male", alpha = 0.5)
    # plt.legend()
    # plt.title("Nope")

    plt.show()
    raw_input()
Exemplo n.º 7
0
def plot_hist_algo(wave_hist_algor, pulse_hist_algor, multi_wave_hist_algor):
    inch_factor = 2.54
    sns.set_context("poster")
    sns.axes_style('white')
    # sns.set_style("ticks")

    fig4= plt.figure(figsize=(35./ inch_factor, 20./ inch_factor))
    ax1 = fig4.add_subplot(2, 3, (1, 4))
    dafr = pd.DataFrame([wave_hist_algor, multi_wave_hist_algor, pulse_hist_algor]) #turn
    dafr = dafr.transpose()
    dafr.columns = ['wave', 'multi-wave', 'pulse']
    sns.violinplot(data=dafr,  ax=ax1, col=("blue", "green", "red"))
    ax1.set_ylabel('psd_proportion')
    ax1.set_xlabel('EOD-type')
    ax1.set_title('Fishsorting based on PSD')

    wave_psd_data = np.load('wave_psd_data.npy')
    wave_hist_data = wave_psd_data[1][:len(wave_psd_data[0][wave_psd_data[0]<1500])]
    ax3 = fig4.add_subplot(2, 3, (2, 5))
    n, bin, patch = ax3.hist(wave_hist_data, 50, color='blue', alpha=0.7, normed=True)
    # ax3.set_ylim([0, max(n)+10])
    ax3.set_ylabel('counts in histogram bin')
    ax3.set_xlabel('amplitude of PSD')
    ax3.set_title('Histogram of pulsefish PSD')

    pulse_psd_data = np.load('pulse_psd_data.npy')
    pulse_hist_data = pulse_psd_data[1][:len(pulse_psd_data[0][pulse_psd_data[0]<1500])]
    ax2 = fig4.add_subplot(2, 3, (3, 6))
    ax2.hist(pulse_hist_data, 50, color='red', alpha=0.7, normed=True)
    # ax2.set_ylim([0, max(n)+10])
    ax2.set_ylabel('counts in histogram bin')
    ax2.set_xlabel('amplitude of PSD')
    ax2.set_title('Histogram of pulsefish PSD')

    fig4.tight_layout()
Exemplo n.º 8
0
def view_distribution(df,x="type",y="rate", plt=plt):
    asset = df.symbol.values[0]
    plt.figure(1,figsize=(15,15))
    sns.violinplot(x=x, y=y, data=df, inner=None)
    sns.stripplot(x=x, y=y, data=df, jitter=True, color="white", edgecolor="gray")
    plt.title(y+' distribution ('+asset+')')
    plt.show()
Exemplo n.º 9
0
    def build_image(self):
        fig, ax = plt.subplots(nrows=2, ncols=1)

        data = pd.DataFrame(list(generate()))

        state_rows = [
            ['WA', 'SA', 'Tas', 'NSW'],
            ['Qld', 'NT', 'Vic']
        ]

        for idx, subax in enumerate(ax):
            to_display = data[data.day.isin(state_rows[idx])]
            sns.violinplot(
                ax=subax,
                x="day",
                y="total_bill",
                hue="sex",
                data=to_display,
                palette="Set2",
                split=True,
                scale="count"
            )
            subax.set_ylabel('')
            subax.set_xlabel('')
            subax.set_yticklabels(RANGES[::, 0][::-1])
            subax.set_yticks(list(map(int, RANGES[::, 1][::-1])))
            subax.legend_.remove()

            subax.set_ylim(0, 100)

        return fig
Exemplo n.º 10
0
def plotResults(tr, resultKey='resultInputPsf', doRates=False, title='', asHist=False, doPrint=True, actuallyPlot=True):
    import matplotlib.pyplot as plt
    import matplotlib
    matplotlib.style.use('ggplot')

    import seaborn as sns
    sns.set(style="whitegrid", palette="pastel", color_codes=True)

    methods = ['ALstack', 'ZOGY', 'SZOGY', 'ALstack_decorr']
    tr = [t for t in tr if t is not None and t[resultKey]]
    FN = pd.DataFrame({key: np.array([t[resultKey][key]['FN'] for t in tr]) for key in methods})
    FP = pd.DataFrame({key: np.array([t[resultKey][key]['FP'] for t in tr]) for key in methods})
    TP = pd.DataFrame({key: np.array([t[resultKey][key]['TP'] for t in tr]) for key in methods})
    title_suffix = 's'
    if doRates:
        FN /= (FN + TP)
        FP /= (FN + TP)
        TP /= (FN + TP)
        title_suffix = ' rate'
    if doPrint:
        print 'FN:', '\n', FN.mean()
        print 'FP:', '\n', FP.mean()
        print 'TP:', '\n', TP.mean()

    if not actuallyPlot:
        return TP, FP, FN

    matplotlib.rcParams['figure.figsize'] = (18.0, 6.0)
    fig, axes = plt.subplots(nrows=1, ncols=2)

    if not asHist:
        sns.violinplot(data=TP, cut=True, linewidth=0.3, bw=0.25, scale='width', alpha=0.5, ax=axes[0])
        if TP.shape[0] < 500:
            sns.swarmplot(data=TP, color='black', size=3, alpha=0.3, ax=axes[0])
        sns.boxplot(data=TP, saturation=0.5, boxprops={'facecolor': 'None'},
                    whiskerprops={'linewidth': 0}, showfliers=False, ax=axes[0])
        plt.setp(axes[0], alpha=0.3)
        axes[0].set_ylabel('True positive' + title_suffix)
        axes[0].set_title(title)
        sns.violinplot(data=FP, cut=True, linewidth=0.3, bw=0.5, scale='width', ax=axes[1])
        if FP.shape[0] < 500:
            sns.swarmplot(data=FP, color='black', size=3, alpha=0.3, ax=axes[1])
        sns.boxplot(data=FP, saturation=0.5, boxprops={'facecolor': 'None'},
                    whiskerprops={'linewidth': 0}, showfliers=False, ax=axes[1])
        plt.setp(axes[1], alpha=0.3)
        axes[1].set_ylabel('False positive' + title_suffix)
        axes[1].set_title(title)
    else:
        for t in TP:
            sns.distplot(TP[t], label=t, norm_hist=False, ax=axes[0])
        axes[0].set_xlabel('True positive' + title_suffix)
        axes[0].set_title(title)
        legend = axes[0].legend(loc='upper left', shadow=True)
        for t in FP:
            sns.distplot(FP[t], label=t, norm_hist=False, ax=axes[1])
        axes[1].set_xlabel('False positive' + title_suffix)
        axes[1].set_title(title)
        legend = axes[1].legend(loc='upper left', shadow=True)

    return TP, FP, FN
Exemplo n.º 11
0
def _plot_categorical_and_continuous(df, xlabel, ylabel, x_keys, y_keys, ax,
                                     cmap, n_cat=5, plottype="box"):
    """
    Plot a categorical variable and a continuous variable against each
    other. Types of plots include box plot, violin plot, strip plot and swarm
    plot.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame with the data

    xlabel : str
        The column name for the variable on the x-axis

    ylabel : str
        The column name for the variable on the y-axis

    ax : matplotlib.Axes object
        The matplotlib.Axes object to plot the bubble plot into

    cmap : matplotlib.cm.colormap
        A matplotlib colormap to use for shading the bubbles

    n_cat : int
        The number of categories; used for creating the colour map

    plottype : {"box" | "violin" | "strip" | "swarm"}
        The type of plot to produce; default is a box plot

    Returns
    -------
    ax : matplotlib.Axes object
        The same matplotlib.Axes object for further manipulation

    """
    if x_keys is xlabel:
        keys = y_keys
    elif y_keys is ylabel:
        keys = x_keys
    else:
        raise Exception("Something went terribly, horribly wrong!")

    current_palette = sns.color_palette(cmap, n_cat)
    if plottype == "box":
        sns.boxplot(x=xlabel, y=ylabel, data=df, order=keys,
                    palette=current_palette, ax=ax)
    elif plottype == "strip":
        sns.stripplot(x=xlabel, y=ylabel, data=df, order=keys,
                      palette=current_palette, ax=ax)
    elif plottype == "swarm":
        sns.swarmplot(x=xlabel, y=ylabel, data=df, order=keys,
                      palette=current_palette, ax=ax)
    elif plottype == "violin":
        sns.violinplot(x=xlabel, y=ylabel, data=df, order=keys,
                       palette=current_palette, ax=ax)
    else:
        raise Exception("plottype not recognized!")

    return ax
Exemplo n.º 12
0
    def plot(self, event, logliks, logsumexps, modality_colors,
             renamed=''):
        modality = logsumexps.idxmax()

        sns.violinplot(event.dropna(), bw=0.2, ax=self.ax_violin,
                       color=modality_colors[modality])

        self.ax_violin.set_ylim(0, 1)
        self.ax_violin.set_title('Guess: {}'.format(modality))
        self.ax_violin.set_xticks([])
        self.ax_violin.set_yticks([0, 0.5, 1])
        # self.ax_violin.set_xlabel(renamed)

        for name, loglik in logliks.iteritems():
            # print name,
            self.ax_loglik.plot(loglik, 'o-', label=name,
                                color=modality_colors[name])
            self.ax_loglik.legend(loc='best')
        self.ax_loglik.set_title('Log likelihoods at different '
                                 'parameterizations')
        self.ax_loglik.grid()
        self.ax_loglik.set_xlabel('phantom', color='white')

        for i, (name, height) in enumerate(logsumexps.iteritems()):
            self.ax_bayesfactor.bar(i, height, label=name,
                                    color=modality_colors[name])
        self.ax_bayesfactor.set_title('$\log$ Bayes factors')
        self.ax_bayesfactor.set_xticks([])
        self.ax_bayesfactor.grid()
        self.fig.tight_layout()
        self.fig.text(0.5, .025, '{} ({})'.format(event.name, renamed),
                      fontsize=10, ha='center', va='bottom')
        sns.despine()
        return self
Exemplo n.º 13
0
def violin_nocomp(lst_for_exclusion, binary_data_frame, tipo,xentry,df_name):
	yes = []
	datalst = []
	no = []

	for alpha in binary_data_frame.index:
		if alpha in lst_for_exclusion:
			datalst.append([sum(binary_data_frame.loc[alpha].tolist()),'%s miRNAs' %(tipo)])
			yes.append(sum(binary_data_frame.loc[alpha].tolist()))
		else:
			datalst.append([sum(binary_data_frame.loc[alpha].tolist()),'Non-%s miRNAs' %(tipo)])
			no.append(sum(binary_data_frame.loc[alpha].tolist()))


	print mean(yes), mean(no)
	print median(yes), median(no)
	print mannwhitneyu(yes, no)


	data_master = pd.DataFrame(datalst,columns=[xentry, 'miRNA Class'])
	sns.violinplot(x='miRNA Class',y=xentry,data=data_master, cut=0)
	if 'tis' in df_name:
		plt.gca().set_ylim([0,20])
	if 'tar' in df_name:
		plt.gca().set_ylim([0,1000])
	plt.savefig('figures/nocomp_violin_%s.pdf' %(df_name),bbox_inches='tight')
	plt.close()
def stripplot_mean_score(df, save_path, atlas=None, suffix=None, x=None,
                         y=None, hue=None, style='whitegrid', fontsize=14,
                         jitter=.2, figsize=(9, 3), leg_pos=2, axx=None):

    def change_label_name(row, label):
        row[label] = new_names[row[label]]
        return row

    ylabel = atlas
    aliases = {'kmeans': 'K-Means',
               'ica': 'GroupICA',
               'dictlearn': 'Dictionary Learning',
               'basc': 'BASC'}
    if atlas == 'kmeans':
        new_names = {'no': 'Without\n regions extracted',
                     'yes': 'With\n regions extracted'}
        df = df.apply(lambda x: change_label_name(x, y), axis=1)
    else:
        new_names = {'no': 'Without\n regions extracted',
                     'yes': 'With\n regions extracted'}
        df = df.apply(lambda x: change_label_name(x, y), axis=1)

    # change the name of the dataset to upper
    df['dataset'] = df['dataset'].str.upper()

    # make labels of the y axes shorter
    # df[y] = df[y].str.wrap(13)

    rc('xtick', labelsize=12)
    rc('ytick', labelsize=16)
    rc('axes', labelweight='bold')  # string.capitalize
    rc('legend', fontsize=fontsize)

    n_data = len(df['dataset'].unique())
    palette = color_palette(n_data)

    # draw a default vline at x=0 that spans the yrange
    axx.axvline(x=0, linewidth=4, zorder=0, color='0.6')

    sns.violinplot(data=df, x=x, y=y, fliersize=0, linewidth=2,
                   boxprops={'facecolor': '0.5', 'edgecolor': '.0'},
                   width=0.5, ax=axx)

    sns.stripplot(data=df, x=x, y=y, hue=hue, edgecolor='gray',
                  size=3, split=True, palette=datasets_palette, jitter=jitter,
                  ax=axx)

    axx.set_xlabel('')
    # axx.set_ylabel(aliases[ylabel], fontsize=15)
    axx.set_ylabel('')
    plt.text(.5, 1.02, aliases[key], transform=ax.transAxes, size=15, ha='center')

    # make the positive labels with "+"
    axx_xticklabels = []
    for x in axx.get_xticks():
        if x > 0:
            axx_xticklabels.append('+' + str(x))
        else:
            axx_xticklabels.append(str(x))
    axx.set_xticklabels(axx_xticklabels)
Exemplo n.º 15
0
def plot_balanced_accuracy_violin(balanced_accuracy_samples, ax=None):
    """ Make a violin plot of the balanced posterior accuracy.

        Parameters
        ----------
        balanced_accuracy_samples : dict
            Where the keys are the classifier names and the each value is an array
            of sample points from which an empirical pdf can be approxmiated.

        ax : Matplotlib Axes object
            A matplotlib Axes instance.

        Returns
        -------
        ax : Matplotlib Axes object
            The matplotlib Axes instance where the figure is drawn.
    """

    if not ax:
        ax = plt.gca()

    sns.violinplot(data=balanced_accuracy_samples, ax=ax, inner='box', cut=2)

    format_as_percent_plot = lambda x, pos: "{:.1f}%".format(x * 100)
    ax.get_yaxis().set_major_formatter(FuncFormatter(format_as_percent_plot))

    return ax
Exemplo n.º 16
0
    def plot_against_y(self, function=None, y_margin=0.1, lim=10, context="talk"):
        """Where colour is squared error or some other var"""
        # do linked plots here
        cat, cont, time = cat_cont_time(self.df[self.vars_of_interest])
        #        cat = self.df.columns[self.df.dtypes=='category']
        #        cont =  self.df.columns[self.df.dtypes=='float64']
        # first continuous
        cols = cat + cont + time
        cols = cols[:10]
        sns.set_context(context)
        fig, axs = plt.subplots(nrows=1, ncols=len(cols), sharey=True)
        for ax, col in zip(axs.flat, cols):
            if col in cont:
                sns.regplot(x=col, y=self.y, data=self.df, ax=ax)
            #        g = sns.lmplot(x="total_bill", y=self.y, data=self.df)
            # then categorical

            # fig, axs = plt.subplots(nrows=1, ncols=len(cat), sharey=True)
            # for ax, col in zip(axs.flat, cat):
            elif col in cat:
                sns.violinplot(x=col, y=self.y, data=self.df, ax=ax)
            else:
                # plot timeseries
                self.df([self.y, col]).plot()
        y_min, y_max = self.df[self.y].min(), (self.df[self.y].max())
        y_range = y_max - y_min
        plt.ylim(y_min - y_margin * y_range, y_max + y_margin * y_range)
        #        g = sns.FacetGrid(self.df,col=self.df.columns[self.df.dtypes=='category'],row=self.y,sharey=True)
        #        g.map(sns.violinplot)
        return fig
 def run(self):
     """
     Run the experiment
     """
     speeds = arange(self.min_vel, self.max_vel, abs(self.max_vel-self.min_vel)/self.num_speeds)
     observations = DataFrame(empty((self.num_samples, len(speeds)))*NaN, columns=(["%.2f" % sp for sp in speeds]))
     for speed in speeds:
         self.errors = []
         self.curr_vel = speed
         # initialize the speed
         _twist = Twist()
         _twist.linear.x = speed
         _twist.angular.z = self.radial_vel
         # keep publishing that speed until we have enough samples
         while len(self.errors) < self.num_samples:
             plt.cla()
             upper = min(len(self.errors), self.num_samples)
             # copy up to self.num_samples into dataframe
             observations["%.2f" % speed][0:upper] = self.errors[0:upper]
             # plot dataframe
             self.ax.set_xlabel('Linear Speed (m/s)',fontsize=16)
             self.ax.set_ylabel('Error (mm/s)',fontsize=16)
             self.ax.set_title('Currently driving at: %.2f m/s' % speed)
             violinplot(data=observations)
             set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
             self.my_mpl.canvas.draw()
             self.pub.publish(_twist)
             sleep(0.01)
     spin()
Exemplo n.º 18
0
def plot():
    # read eui table
    df = pd.read_csv(os.getcwd() + '/csv/eui.csv')
    logger.debug('finished reading file eui.csv')

    #df = df[df['Region'] < 11]
    df.boxplot(column='EUI', by='Region')
    plt.ylabel('EUI')
    plt.xlabel('Region')
    plt.title('EUI by Region')
    P.savefig(os.getcwd() + '/plot2/EUIbyRegion.png')
    plt.close()

    import seaborn as sns
    grouped = df.groupby('Region')
    for name, group in grouped:
        sns.distplot(group['EUI'])
        plt.xlabel('EUI')
        plt.title('EUI Distribution')
        P.savefig(os.getcwd() + '/plot2/Region-' + str(name) + '-EUIdistribution.png')
        plt.close()
    df.sort(columns='Region', inplace=True)
    sns.violinplot(x = 'Region', y = 'EUI', data = df)
    plt.ylabel('EUI')
    plt.xlabel('Region')
    plt.title('EUI by Region Violin Plot')
    P.savefig(os.getcwd() + '/plot2/EUIbyRegionViolin.png')
    plt.close()
Exemplo n.º 19
0
def violin_subplot(ax, df, p, ylab):
    sns.violinplot(x='group', y=p, hue='gender', axis=1, data=df, 
                   split=True, inner="quart", ax=ax)
    plt.xticks(rotation=10)
    plt.legend(loc=2)
    plt.xlabel('')
    plt.ylabel(ylab)
Exemplo n.º 20
0
def violinplot(data_pd, feature_names):
    for column_index, column in enumerate(feature_names):
        if column_index%4 == 0:
            plt.figure(figsize=(10,10))
        plt.subplot(2, 2, column_index%4 + 1)
        #print(column, data_pd)
        sb.violinplot(x='class', y=column, data=data_pd)
Exemplo n.º 21
0
    def CheckShannonIndex(self, labels=None, condition_dict=None, fig_title=None):
        # Description: calculate the Shannon entropy of all samples, and plot on boxplot
        # If labels is specified, also plot the entropy of samples in each of the labels.
        def ShannonIndex(numList):   ## Calculate Shannon Entropy
            SU = sum(numList)
            SDI = 0.0
            for num in numList:
            	freq = float(num)/SU
            	if freq>0:
        	    	SDI = SDI - freq * np.log(freq)
            return SDI

        print('Making Shannon Diversity boxplot for all samples')

        # Calculate shannon entropy for each sample
        SDIs = pd.DataFrame(index=self.abun_df.index, columns=['SDI'])
        for sample in self.abun_df.index:
            SDIs.loc[sample, 'SDI'] = ShannonIndex(self.abun_df.loc[sample])
        # Add metadata labels to the df containing SDIs
        SDIs = pd.concat([SDIs, self.meta_df], axis=1)
        SDIs['SDI'] = SDIs['SDI'].astype('float64')
        self.SDI = SDIs

        # Plot all boxplots, and save if fig_title was given
        if fig_title:
            fig_ext = fig_title.rsplit('.',1)[1]
            fig_title = fig_title.rsplit('.',1)[0]

        # First plot SDI of all samples
        if fig_title:
            ax = sb.violinplot(x=SDIs['SDI'], inner=None, saturation=0.35)
            ax = sb.stripplot(x=SDIs['SDI'], jitter=True, size=5, linewidth=0.6)
            fig = ax.get_figure()
            fig.savefig(fig_title + '_all.violinplot.' + fig_ext)
            plt.close()
            # Do the boxplot
            ax = sb.boxplot(x=SDIs['SDI'])
            ax = sb.stripplot(x=SDIs['SDI'], jitter=True, size=5, linewidth=0.6)
            fig = ax.get_figure()
            fig.savefig(fig_title + '_all.boxplot.' + fig_ext)
            plt.close()

        if labels:
            print('Making boxplots separated by labels: ')
            for label in labels:
                print(label + '...')
                # Try with seaborn library
                SDIs[label] = SDIs[label].astype('category')
                ax = sb.violinplot(x=label, y='SDI', data=SDIs, saturation=0.35, inner=None)
                ax = sb.stripplot(x=label, y='SDI', data=SDIs, jitter=True, size=5, linewidth=0.6)
                fig = ax.get_figure()
                fig.savefig(fig_title + '_' + label + '.violinplot.' + fig_ext)
                plt.close(fig)
                # Boxplot
                ax = sb.boxplot(x=label, y='SDI', data=SDIs, saturation=0.35)
                ax = sb.stripplot(x=label, y='SDI', data=SDIs, jitter=True, size=5, linewidth=0.6)
                fig = ax.get_figure()
                fig.savefig(fig_title + '_' + label + '.boxplot.' + fig_ext)
                plt.close()
Exemplo n.º 22
0
def main():
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = randn(500)
    
    # Set the fonts the way I like them
    sns.set_context('poster')
    sns.set_style('ticks')
    #mystyle.set()
    
    # Scatter plot
    scatter(arange(len(x)), x)
    xlim([0, len(x)])
    mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter')
    
    # Histogram
    hist(x)
    mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings')
    
    hist(x,25)
    mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plot(stats.cumfreq(x,numbins)[0])
    mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='Cumulative Frequency')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    boxplot(x, sym='*')
    mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    boxplot(x, sym='*', vert=False)
    title('Boxplot, horizontal')
    xlabel('Values')
    show()
    
    # Errorbars
    x = arange(5)
    y = x**2
    errorBar = x/2
    errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    xlim([-0.2, 4.2])
    ylim([-0.2, 19])
    mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    #sns.violinplot(df, color = ["#999999", "#DDDDDD"])
    sns.violinplot(df)
    
    mystyle.printout('violinplot.png')
Exemplo n.º 23
0
    def sns_violinplot(x, y, hue, bw, scale, inner, split, orient, color, saturation): # pragma: no cover
        x, y, hue, inner, orient, color = ut.widget2py(x, y, hue, inner, orient, color)
        ax, fig, _ = ut.get_ax_fig_plt()

        sns.violinplot(x=x, y=y, hue=hue, data=data, order=None, hue_order=None,
                       bw=bw, cut=2, scale=scale, scale_hue=True,
                       gridsize=100, width=0.8, inner=inner, split=split, orient=orient,
                       linewidth=None, color=color, palette=None, saturation=saturation, ax=ax, **kwargs)
Exemplo n.º 24
0
 def make_plot(self):
     """Make the violin plot with self.plotdata and self.snskwargs and
     configure the subplot according to self.fmt"""
     import seaborn as sns
     plt.sca(self.ax)
     sns.violinplot(self.plotdata, **self.snskwargs)
     self._configureaxes()
     plt.draw()
Exemplo n.º 25
0
def twoviolins_nooutliers(df, dset='bcrp', model='logreg3', feats='ecfps1', pos_proportions_min=0.4,
                          pos_proportions_max=0.8):
    # Filter out outliers or degenerated cases: "too imbalanced"
    df = df[(df.dset == dset) & (df.model == model) & (df.feats == feats)]
    balanced = df[(df['pos_proportion'] > pos_proportions_min) & (df['pos_proportion'] < pos_proportions_max)]
    plt.figure()
    sns.violinplot(balanced.auc, balanced.lso)
    plt.draw()
Exemplo n.º 26
0
def plot_quantile_returns_violin(return_by_q,
                                 ylim_percentiles=None,
                                 ax=None):
    """
    Plots a violin box plot of period wise returns for factor quantiles.

    Parameters
    ----------
    return_by_q : pd.DataFrame - MultiIndex
        DataFrame with date and quantile as rows MultiIndex,
        forward return windows as columns, returns as values.
    ylim_percentiles : tuple of integers
        Percentiles of observed data to use as y limits for plot.
    ax : matplotlib.Axes, optional
        Axes upon which to plot.

    Returns
    -------
    ax : matplotlib.Axes
        The axes that were plotted on.
    """

    return_by_q = return_by_q.copy()
        
    if ylim_percentiles is not None:
        ymin = (np.nanpercentile(return_by_q.values,
                              ylim_percentiles[0]) * DECIMAL_TO_BPS)
        ymax = (np.nanpercentile(return_by_q.values,
                              ylim_percentiles[1]) * DECIMAL_TO_BPS)
    else:
        ymin = None
        ymax = None

    if ax is None:
        f, ax = plt.subplots(1, 1, figsize=(18, 6))

    unstacked_dr = (return_by_q
                    .multiply(DECIMAL_TO_BPS))
    unstacked_dr.columns = unstacked_dr.columns.set_names('forward_periods')
    unstacked_dr = unstacked_dr.stack()
    unstacked_dr.name = 'return'
    unstacked_dr = unstacked_dr.reset_index()

    sns.violinplot(data=unstacked_dr,
                   x='factor_quantile',
                   hue='forward_periods',
                   y='return',
                   orient='v',
                   cut=0,
                   inner='quartile',
                   ax=ax)
    ax.set(xlabel='', ylabel='Return (bps)',
           title="Period Wise Return By Factor Quantile",
           ylim=(ymin, ymax))

    ax.axhline(0.0, linestyle='-', color='black', lw=0.7, alpha=0.6)

    return ax
Exemplo n.º 27
0
def bar_box_violin_dot_plots(data, category_col, numeric_col, axes,
                             file_name=None):
    sns.barplot(category_col, numeric_col, data=data, ax=axes[0])
    sns.boxplot(category_col, numeric_col,
                data=data[data[numeric_col].notnull()], ax=axes[2])
    sns.violinplot(category_col, numeric_col, data=data, kind='violin', inner="quartile", scale='count', split=True,
                   ax=axes[3])
    sns.stripplot(category_col, numeric_col, data=data, jitter=True, ax=axes[1])
    sns.despine(left=True)
def FacetGrid():
    sns.set_style("dark",{"axes.facecolor":"black"})
    f, axes = plt.subplots(2,2, figsize=(12,8))
    [Kde(i,axes)  for i in range(0,2)]
    sns.violinplot(data=movies, x = 'Year', y='BudgetMillions', ax=axes[1,0],palette="YlOrRd")
    sns.kdeplot(movies.CriticRating,movies.AudienceRating,shade=True,shade_lowest=False,cmap='Blues_r',ax=axes[1,1])
    sns.kdeplot(movies.CriticRating,movies.AudienceRating,cmap='gist_gray_r', ax=axes[1,1])
    plt.gcf().canvas.set_window_title('Facet Grid')
    plt.show()
            aspect=2)

# In[36]:

#Countplot
sns.countplot(df_merged['cab_type'], hue=df_merged['name'], palette='plasma')

# In[37]:

sns.boxplot(data=df_merged, x='source', y='price', palette='Blues')

# In[38]:

sns.violinplot(data=df_merged,
               x='destination',
               y='price',
               palette="Set3",
               scale="width")

# In[39]:

#Jointplot
sns.jointplot("price", "rain", data=df_merged, kind="hex", bins=15)

# In[40]:

df_merged.set_index('date').groupby('name')['price'].plot(legend=True)

# In[41]:

sns.distplot(df_merged.price,
Exemplo n.º 30
0
    new_df = new_df[new_df.Signal != 0]
    new_df['lognorm'] = np.log(new_df['Signal'])
    df_list.append(new_df)
    
df = pd.concat(df_list)
a4_dims = (15.7, 8.27)
fig, ax = plt.subplots(figsize=a4_dims)

groups = pd.read_csv('inputs/Groups.csv')

groups.loc[groups['Group'] == 'Blank', 'Color'] = '#FF35E7'




ax = sns.violinplot(x="lognorm", y="Sample", data=df,scale="count",inner='box',palette=groups['Color'].tolist())
ax.set_title('Violin Plot - lognorm')
ax.set_ylabel('Sample')
ax.set_xlabel('Normalized Intensity')
plt.savefig(results_folder+'QC/plot.distribution.png',dpi=400)


sum_intensity = pd.DataFrame(full_matrix.sum())
sum_intensity['sample'] = sum_intensity.index
sum_intensity.columns = ['Sum Signal','Sample']

a4_dims = (11.7, 8.27)
fig, ax = plt.subplots(figsize=a4_dims)
ax = sns.barplot(x="Sum Signal", y="Sample", data=sum_intensity,palette=groups['Color'].tolist())
ax.set_title('Sample Sum Intensities')
ax.set_ylabel('Sample')
Exemplo n.º 31
0
#    attr = df[i]
#    sns.distplot(attr)

#for column in df:
#    print(column)
#    columnSeriesObj = df[column]
#    print(columnSeriesObj)
x.plot.hist(bins=4) #plotting histograms
print("test")
Attr5 = df['Attr 5']
Attr5.plot.hist(bins=5)
    
    # plotting horizontal violin plots
sns.set(style="whitegrid")
tips = sns.load_dataset("Frequency")
ax = sns.violinplot(x=tips["Attributes"])

#plotting scatter materix
pd.plotting.scatter_matrix(df, alpha=0.5, figsize=(15, 15))
plt.show()

#covariance tables
np.random.seed(42)
df=pd.DataFrame(np.random.randn(1000,9),columns=['Attr 4','Attr 5', 'Attr 6','Attr 7', 'Attr 8', 'Attr 9', 'Attr 10', 'Attr 11','Attr 12'])
df.cov()

#correlation tables and heat maps of covariance and corr
df_corr = df.corr()
print(df_corr.head())
data1 = df_corr.values
fig1 = plt.figure()
Exemplo n.º 32
0
# Just for the last 5 years
all_data13=all_data[all_data['year']>2012]
palette=sns.cubehelix_palette(5, start=2, rot=0, dark=0, light=.95, reverse=False)
sns.pairplot(all_data13[all_data13['name']=='RDSB.L'].drop(['share_price_scaled'],axis=1),
             hue='year',palette=palette,size=4,markers='o',
             plot_kws=dict(s=50, edgecolor='b', linewidth=0))

#==============================================================================
# Violin Plot Oil price on last 5 years
#==============================================================================

sns.set_style('whitegrid')
palette=sns.cubehelix_palette(5, start=2.8, rot=0, dark=0.2, light=0.8, reverse=False)

sns.violinplot(x='year', y='oil_price', data=all_data13[all_data13['name']=='RDSB.L'],
               inner='quart', palette=palette, trim=True)

#==============================================================================
# Violin Plot Oil price on last 5 years
#==============================================================================

sns.factorplot(x='year', y='share_price_scaled', col='name', col_wrap=3,kind='violin',
               split=True, data=all_data13,inner='quart', palette=palette, trim=True,size=4,aspect=1.2)
sns.despine(left=True)

#==============================================================================
# joint plot using 5 years for Premier Oil
#==============================================================================

sns.jointplot('oil_price', 'share_price',data=all_data13[all_data13['name']=='PMO.L'],kind='kde',
              hue='year',size=6,ratio=2,color='red').plot_joint(sns.kdeplot, zorder=0, n_levels=20)
Exemplo n.º 33
0
# The other part of the plot, the "whiskers", shows the extent of the points beyond the center of the distribution. Individual circles beyond *that* are outliers.
# 
# This boxplot shows us that although all five wines recieve broadly similar ratings, Bordeaux-style wines tend to be rated a little higher than a Chardonnay.
# 
# Boxplots are great for summarizing the shape of many datasets. They also don't have a limit in terms of numeracy: you can place as many boxes in the plot as you feel comfortable squeezing onto the page.
# 
# However, they only work for interval variables and nominal variables with a large number of possible values; they assume your data is roughly normally distributed (otherwise their design doesn't make much sense); and they don't carry any information about individual values, only treating the distribution as a whole.
# 
# I find the slightly more advanced `violinplot` to be more visually enticing, in most cases:

# In[ ]:


sns.violinplot(
    x='variety',
    y='points',
    data=reviews[reviews.variety.isin(reviews.variety.value_counts()[:5].index)]
)

# A `violinplot` cleverly replaces the box in the boxplot with a kernel density estimate for the data. It shows basically the same data, but is harder to misinterpret and much prettier than the utilitarian boxplot.

# ## Why seaborn?
# 
# Having now seen both `pandas` plotting and the `seaborn` library in action, we are now in a position to compare the two and decide when to use which for what.
# 
# Recall the data we've been working with in this tutorial is in:

# In[ ]:


reviews.head()
Exemplo n.º 34
0
import matplotlib.pyplot as plt
import seaborn as sns
data = []
with open('./data/met/diff_met.bed', 'r') as f:
    for line in iter(f):
        data.append(int(line))
fig = plt.figure()
sns.set(style="whitegrid")
ax = sns.violinplot(x=data)
fig.savefig('./data/met/met_windows.png')
Exemplo n.º 35
0
f = plt.gcf()
f.set_size_inches(wd, ht)
sns.despine()
plt.ylim(0, 1.1)
plt.yticks([0, .5, 1])
plt.xticks(rotation=45)
plt.title(
    'Pillow model does better longer time\nPillow has slow derivative only\nSTM has 3 derivatives'
)
plt.tight_layout()
# ==========================
f = plt.figure(figsize=(wd, ht))
sns.violinplot(data=df_melt,
               x='kernels',
               y='Pearson Correlation',
               hue='model',
               split=True,
               inner='quartile',
               palette='Set2',
               legend_out=False)
sns.despine()
plt.grid('on', axis='y')
plt.yticks([0, .5, 1])
plt.xticks(rotation=45)
plt.tight_layout()
# ======================
# plot pct_diff (how much better the pillow is)
wd = figsize[0] / 1.5
ht = figsize[0] / 1.5
df_merged = df_merged.merge(
    df_pillow_drops[['full', 'id', 'kernels', 'stim_responsive']],
    on=['id', 'kernels', 'stim_responsive'])
Exemplo n.º 36
0

output = "/neurospin/brainomics/2016_schizConnect/2018_analysis_2ndpart_clinic/\
results/clustering/nudast_only_clustering/correction_age_sex_site/3_clusters_solution"


df = pd.DataFrame()
score = df_scores["vocabsca"].astype(np.float).values
df["labels"]=labels_cluster[np.array(np.isnan(score)==False)]
LABELS_DICT = {0: "cluster 1", 1: "cluster 2", 2: "cluster 3"}
df["labels_name"]  = df["labels"].map(LABELS_DICT)
df["vocabsca"] =  score[np.array(np.isnan(score)==False)]
T,p = scipy.stats.f_oneway(df[df["labels"]==0]["vocabsca"],\
                     df[df["labels"]==1]["vocabsca"],\
                     df[df["labels"]==2]["vocabsca"])
ax = sns.violinplot(x="labels_name", y="vocabsca", data=df,order=["cluster 1","cluster 2","cluster 3"])
plt.title("ANOVA: t = %s, and  p= %s"%(T,p))
plt.savefig(os.path.join(output,"vocabsca.png"))




df = pd.DataFrame()
score = df_scores["cvlfps"].astype(np.float).values
df["labels"]=labels_cluster[np.array(np.isnan(score)==False)]
df["labels"]=labels_cluster[np.array(np.isnan(score)==False)]
LABELS_DICT = {0: "cluster 1", 1: "cluster 2", 2: "cluster 3"}
df["labels_name"]  = df["labels"].map(LABELS_DICT)
df["cvlfps"] =  score[np.array(np.isnan(score)==False)]
T, p = scipy.stats.f_oneway(df[df["labels"]==0]["cvlfps"],\
                     df[df["labels"]==1]["cvlfps"],\
    listkey.append(key)
    listval.append(val)
    # print key, val, u' ',

df = pd.DataFrame(listval, columns=[u'次数'])
df.index = listkey
df.plot(kind='bar')
plt.title(u'词频统计')
plt.show()

#  Number of words in the text ##
dataset["num_words"] = dataset["text"].apply(lambda x: len(str(x).split()))
dataset['num_words'].loc[
    dataset['num_words'] > 1000] = 1000  # truncation for better visuals
plt.figure(figsize=(12, 8))
sns.violinplot(x='first_class', y='num_words', data=dataset)
plt.xlabel('First Class', fontsize=12)
plt.ylabel('Number of words in text', fontsize=12)
plt.title("Number of words in First Class", fontsize=15)
plt.show()

# global len distribution.
plt.figure(figsize=(12, 8))
plt.hist(dataset["num_words"],
         bins=200,
         range=[10, 1000],
         color=pal[1],
         normed=True,
         label='train')
plt.title('Normalised histogram of words count in text', fontsize=15)
plt.legend()
#
# ### But we can see from the table that winning a round is equally distributed on the maps Overpass and Train.
# ### Interesting...!

plt.figure(figsize=(10, 12))
sns.countplot(x='winner_side',
              hue='is_bomb_planted',
              data=df,
              palette='RdBu_r')

# After planting the bomb there is more probability of CT side to win the round.

plt.figure(figsize=(10, 12))
sns.countplot(x='winner_side', hue='round_type', data=df, palette='coolwarm')

plt.figure(figsize=(10, 12))
sns.countplot(x='map', hue='round_type', data=df)

# Force buy and eco work well in Mirage..and works worst in inferno or overpass depending upon the frequency of matches you play.

plt.figure(figsize=(10, 12))
sns.countplot(x='winner_side', hue='round', data=df)

plt.figure(figsize=(8, 10))
sns.boxplot(x='map', y='avg_match_rank', data=df, palette='magma')

plt.figure(figsize=(10, 12))
sns.violinplot(x='map', y='avg_match_rank', data=df, palette='coolwarm')

sns.lmplot(x='round', y='avg_match_rank', data=df, hue='map')
#
# 1. Start by creating a variable `ax` and setting it equal to `sns.violinplot()`. This will instantiate a figure and give us access to the axes through the variable name `ax`.
# 2. Use `sns.violinplot()` and pass in the following arguments:
# + The `Quarter` column as the `x` values
# + The `Price` column as your `y` values
# + The `netflix_stocks_quarterly` dataframe as your `data`
# 3. Improve the readability of the chart by adding a title of the plot. Add `"Distribution of 2017 Netflix Stock Prices by Quarter"` by using `ax.set_title()`
# 4. Change your `ylabel` to "Closing Stock Price"
# 5. Change your `xlabel` to "Business Quarters in 2017"
# 6. Be sure to show your plot!
#

# In[105]:

plt.figure(figsize=(10, 7))
ax = sns.violinplot(data=netflix_stocks_quarterly, x='Quarter', y='Price')
ax.set_title('Netflix \'17 Stock Price Distribution',
             color='#400090',
             fontsize=25,
             fontweight='bold',
             pad=20)
ax.set_xlabel('Quarters')
ax.set_ylabel('Stock Price')
plt.gca().set_yticklabels(
    ['${:,.0f}'.format(x) for x in plt.gca().get_yticks()])

plt.savefig('Netflix 2017 Stock Price Dist by Quarter.png')

# ## Graph Literacy
# - What are your first impressions looking at the visualized data?
#
Exemplo n.º 40
0
axes[1].set(xlabel='')
axes[0].set(xlabel='')
axes[0].set_title('Indoor Water Use')

# #### Shower events duration compared to RWEUS2016 Study
# + Residential Water End Use Study (RWEUS2016) URL: https://www.circleofblue.org/wp-content/uploads/2016/04/WRF_REU2016.pdf
# + Using Violinplot with Seaborn and Matplotlib
# + Violin plot is a combination of bar and kernel density plots
# + The width of the violin represent the probability where skinner sections represent a lower probability
# + Add a horizontal line that represent the average shower duration from the REWUS2016 study

# In[8]:

ShowerEvents = Events[Events.Label == "shower"]
ax = sns.violinplot(x="Label",
                    y="Duration(min)",
                    data=ShowerEvents,
                    palette="colorblind")
sns.despine(right=True)
ax.set(xlabel='', ylabel='Duration(min)')
ax.axhline(y=8, c='red')

# #### Daily and hourly water use

# In[9]:

# Aggregate pulses by hour and calculate the average number of pulses per each hour
Use_Hour = RawData.groupby(RawData.index.hour).mean()
Use_Hour.Pulses = Use_Hour.Pulses * 0.041619 * 15 * 60  # where 0.041619 is the meter resoultion, 15 is the number of 4 seconds in one minute (60/4)
# and 60 is the number of minutes in an hour

# In[10]:
Exemplo n.º 41
0
def plot_pm(report):
    sns.violinplot(x=report["pm2.5"])
    plt.show()
def E1_vs_insulation_scatterplor(E1, E1_resolution, k, averaged,
                                 boundary_index, domains):

    figure_path = "results/" + os.path.basename(
        domains_file) + ".Insulation_violinplot.png"
    if os.path.isfile(figure_path) and not redraw_figs:
        return

    # first compute genome-wide average of E1 differences
    def doesE1overlapDomain(e1, boundaries):
        overlap = boundaries.loc[e1.chr].index.overlaps(
            pd.Interval(e1.start, e1.end, closed="both"))
        return np.any(overlap)

    # remove E1 overlaping TAD boundaries
    TADboundaries = pd.DataFrame({"chr": domains.chr, "vals": domains.chr})
    TADboundaries["intervals"] = pd.arrays.IntervalArray.from_arrays(
        domains.start - k * E1_resolution,
        domains.start + k * E1_resolution,
        closed="both")
    TADboundaries.index = pd.MultiIndex.from_frame(
        TADboundaries[["chr", "intervals"]])
    E1["contains_TAD_boundary"] = E1.apply(doesE1overlapDomain,
                                           boundaries=TADboundaries,
                                           axis="columns")
    print(sum(E1["contains_TAD_boundary"].values), " out of ",
          len(E1["contains_TAD_boundary"]),
          "E1 bins are located near TAD boundary")

    E1 = pd.DataFrame(E1)  # copy E1 dataframe
    temp = [
        E1["E1"].shift(periods=i).values for i in np.arange(0, 2 * k + 1)[::-1]
    ]
    temp = np.vstack(temp).T
    temp = temp[np.logical_and(~np.isnan(temp).any(axis=1),
                               ~E1["contains_TAD_boundary"].values)]
    print("After filtering, ", len(temp),
          " bins left to compute expected E1 diff")
    expected_E1_average = np.vstack((np.average(temp[:, :boundary_index],
                                                axis=1),
                                     np.average(temp[:, boundary_index + 1:],
                                                axis=1))).T
    expected_E1_diff = np.abs(
        np.subtract(expected_E1_average[:, 0], expected_E1_average[:, 1]))

    E1diff = np.abs(np.subtract(averaged[:, 0], averaged[:, 1]))
    from scipy.stats import mannwhitneyu
    with open(figure_path + ".stats.txt", "w") as fout:
        fout.write("Obseved average: " + str(np.average(E1diff)) + "\n")
        fout.write("Obsrved average: " + str(np.average(E1diff)) + "\n")
        statistic, pval = mannwhitneyu(E1diff,
                                       expected_E1_diff,
                                       alternative="two-sided")
        fout.write("mannwhitneyu 2-sided test: " + str(pval) + "\n")
        print("mannwhitneyu 2-sided test: " + str(pval))
    print("--Drowing violinplot")
    plot_data = {
        "label": ["Expected cePC1 diff"] * len(expected_E1_diff) +
        ["TAD boundaries cePC1 diff"] * len(E1diff),
        "|cePC1_left-cePC1_right|":
        expected_E1_diff.tolist() + E1diff.tolist(),
        "x": [shortname] * (len(expected_E1_diff) + len(E1diff))
    }
    plot_data = pd.DataFrame(plot_data)
    fig, ax = plt.subplots(figsize=(4, 8))
    vp = sns.violinplot(ax=ax,
                        x="x",
                        y="|cePC1_left-cePC1_right|",
                        hue="label",
                        data=plot_data,
                        split=True,
                        inner="quartile")
    vp.legend_.remove()
    vp.set_xlabel("")
    plt.savefig(figure_path, dpi=300)
    plt.clf()
    return
    # Uncomment following to draw scatterplot
    """
def EDA(df,
        labels,
        target_variable_name,
        data_summary_figsize=(12, 12),
        corr_matrix_figsize=(12, 12),
        data_summary_figcol="Reds_r",
        corr_matrix_figcol='Blues',
        corr_matrix_annot=False,
        pairplt_col='all',
        pairplt=False,
        feature_division_figsize=(12, 12)):
    out_folder = '../figures/'
    start_time = timeit.default_timer()

    #for converting class labels into integer values
    if df[target_variable_name].dtype == 'object':
        class_labels = df[target_variable_name].unique().tolist()
        class_labels = [x for x in class_labels if type(x) == str]
        class_labels = [x for x in class_labels if str(x) != 'nan']

        for i in range(len(class_labels)):
            df[target_variable_name][df[target_variable_name] ==
                                     class_labels[i]] = i

    df_orig = df
    #print('The data looks like this: \n',df_orig.head())
    #print('\nThe shape of data is: ',df_orig.shape)

    #To check missing values
    #print('\nThe missing values in data are: \n',pd.isnull(df_orig).sum().sort_values(ascending=False))
    ax1 = sns.heatmap(pd.isnull(df_orig),
                      cmap=sns.diverging_palette(240, 0, as_cmap=True))
    plt.title("Missing Values Summary", fontsize=(15), color="blue")
    fig1 = ax1.get_figure()
    fig1.savefig(f"{out_folder}Missing_Values_Summary.png")

    #Descriptive Statistics
    #print('\nThe summary of data is: \n',df_orig.describe())
    fig2 = plt.figure(figsize=data_summary_figsize)
    sns.heatmap(df_orig.describe()[1:].transpose(),
                annot=True,
                fmt=".1f",
                linecolor="black",
                linewidths=0.3,
                cmap=data_summary_figcol)
    plt.title("Data Summary", fontsize=(15), color="blue")
    fig2.savefig(f"{out_folder}Summary_Statistics.png")

    #print('\nSome useful data information: \n')
    #print(df_orig.info())
    #print('\nThe columns in data are: \n',df_orig.columns.values)

    null_cutoff = 0.5

    numerical = numericalCategoricalSplit(df_orig)[0]
    categorical = numericalCategoricalSplit(df_orig)[1]
    null_numerical = nullFind(numerical)[0]
    null_categorical = nullFind(categorical)[1]
    null = pd.concat([null_numerical, null_categorical])
    null_df = pd.DataFrame({
        'Null_in_Data': null
    }).sort_values(by=['Null_in_Data'], ascending=False)
    null_df_many = (null_df.loc[(null_df.Null_in_Data >
                                 null_cutoff * len(df_orig))])
    null_df_few = (
        null_df.loc[(null_df.Null_in_Data != 0)
                    & (null_df.Null_in_Data < null_cutoff * len(df_orig))])

    many_null_col_list = null_df_many.index
    few_null_col_list = null_df_few.index

    #remove many null columns
    df_orig.drop(many_null_col_list, axis=1, inplace=True)

    df_wo_null = (removeNullRows(df_orig, few_null_col_list))

    if df_wo_null[target_variable_name].dtype == 'object':
        df_wo_null[target_variable_name] = df_wo_null[
            target_variable_name].astype(str).astype(int)

    df = df_wo_null[df_wo_null.select_dtypes(exclude=['object']).columns]

    #Check correlation matrix
    corr = df.corr()
    mask = np.zeros_like(corr, dtype=bool)
    mask[np.triu_indices_from(mask)] = True

    fig3 = plt.figure(figsize=corr_matrix_figsize)
    sns.heatmap(corr,
                mask=mask,
                cmap=corr_matrix_figcol,
                annot=corr_matrix_annot)
    plt.tight_layout()
    fig3.savefig(f"{out_folder}Correlation_Matrix.png")

    col = df.columns.values
    number_of_columns = len(col)
    number_of_rows = len(col) - 1 / number_of_columns

    #To check Outliers
    fig4 = plt.figure(figsize=(number_of_columns, number_of_rows))

    for i in range(0, len(col)):
        #plt.subplot(number_of_rows + 1,number_of_columns,i+1)
        if number_of_columns % 2 == 0:
            plt.subplot(number_of_columns / 2, 2, i + 1)
            sns.set_style('whitegrid')
            sns.boxplot(df[col[i]], color='green', orient='h')
            plt.tight_layout()
        else:
            plt.subplot((number_of_columns + 1) / 2, 2, i + 1)
            sns.set_style('whitegrid')
            sns.boxplot(df[col[i]], color='green', orient='h')
            plt.tight_layout()
    fig4.savefig(f"{out_folder}Outliers.png")

    #To check distribution-Skewness
    for i in range(0, len(col)):
        fig, axis = plt.subplots(1, 2, figsize=(16, 5))
        sns.distplot(df_orig[col[i]], kde=True, ax=axis[0])
        axis[0].axvline(df_orig[col[i]].mean(),
                        color="k",
                        linestyle="dashed",
                        label="MEAN")
        axis[0].legend(loc="upper right")
        axis[0].set_title('distribution of {}. Skewness = {:.4f}'.format(
            col[i], df_orig[col[i]].skew()))

        sns.violinplot(x=target_variable_name,
                       y=col[i],
                       data=df_orig,
                       ax=axis[1],
                       inner='quartile')
        axis[1].set_title('violin of {}, split by target'.format(col[i]))
        fig.savefig(f"{out_folder}Distribution Skewness of {col[i]}.png")

    #to construct pairplot
    if (pairplt == True) and (pairplt_col != 'all'):
        ax_pp = sns.pairplot(data=df,
                             vars=pairplt_col,
                             hue=target_variable_name)
        fig_pp = ax_pp.get_figure()
        fig_pp.savefig(f"{out_folder}Pair plot.png")
    elif (pairplt == True) and (pairplt_col == 'all'):
        fig_pp = sns.pairplot(data=df,
                              vars=df.columns.values,
                              hue=target_variable_name)
        fig_pp = ax_pp.get_figure()
        fig_pp.savefig(f"{out_folder}Pair plot.png")

    #Proportion of target variable in dataset

    st = df[target_variable_name].value_counts().sort_index()
    #print('\nThe target variable is divided into: \n',st) #how many belong to each class of target variable

    fig5 = plt.figure(figsize=feature_division_figsize)
    plt.subplot(121)
    ax = sns.countplot(y=df_orig[target_variable_name],
                       linewidth=1,
                       edgecolor="k" * 2)
    for i, j in enumerate(st):
        ax.text(.7, i, j, weight="bold", fontsize=27)
    plt.title("Count for target variable in datset")

    plt.subplot(122)
    plt.pie(st,
            labels=labels,
            autopct="%.2f%%",
            wedgeprops={
                "linewidth": 2,
                "edgecolor": "white"
            })
    my_circ = plt.Circle((0, 0), .7, color="white")
    plt.gca().add_artist(my_circ)
    plt.subplots_adjust(wspace=.2)
    plt.title("Proportion of target variable in dataset")
    fig5.savefig(f"{out_folder}Outcome Variable.png")

    #print('\nThe numerical features are: \n',df_wo_null.select_dtypes(exclude=['object']).columns.tolist())
    #print('\nThe categorical features are: \n',df_wo_null.select_dtypes(include=['object']).columns.tolist())

    #Proportion of categorical variables in dataset
    if len(df_wo_null.select_dtypes(include=['object']).columns.tolist()) >= 1:
        for cat_feat in df_wo_null.select_dtypes(
                include=['object']).columns.tolist():

            ct = df_wo_null.select_dtypes(
                include=['object'])[cat_feat].value_counts().sort_values(
                    ascending=False)
            print('\nThe categorical variable is divided into: \n',
                  ct)  #how many belong to each class of target variable

            if (ct.index.size) < 50:
                fig_cat = plt.figure(figsize=feature_division_figsize)
                plt.subplot(121)
                ax = sns.countplot(
                    y=df_wo_null.select_dtypes(include=['object'])[cat_feat],
                    linewidth=1,
                    edgecolor="k" * 2)
                for i, j in enumerate(ct):
                    ax.text(.7, i, j, weight="bold", fontsize=27)
                plt.title("Count for categorical variable in datset")

                plt.subplot(122)
                plt.pie(ct,
                        labels=df_wo_null.select_dtypes(
                            include=['object'])[cat_feat].unique().tolist(),
                        autopct="%.2f%%",
                        wedgeprops={
                            "linewidth": 2,
                            "edgecolor": "white"
                        })
                my_circ = plt.Circle((0, 0), .7, color="white")
                plt.gca().add_artist(my_circ)
                plt.subplots_adjust(wspace=.2)
                plt.title("Proportion of categorical variable in dataset")
                fig_cat.savefig(f"{out_folder}Categorical Variable.png")
            else:
                print(
                    '\nThe categorical variable %s has too many divisions to plot \n'
                    % cat_feat)
            continue
    elapsed = timeit.default_timer() - start_time
    print('\nExecution Time for EDA: %.2f minutes' % (elapsed / 60))

    return df_wo_null, df_wo_null.select_dtypes(
        exclude=['object']).columns.tolist(), df_wo_null.select_dtypes(
            include=['object']).columns.tolist()
Exemplo n.º 44
0
import seaborn as sns
import matplotlib.pyplot as plt

df = sns.load_dataset("tips")

sns.set()
sns.violinplot(x="day", y="total_bill", hue="sex", split=True, data=df)
plt.show()
Exemplo n.º 45
0
plt.subplot(131)
for a in range(5):
    for b in range(5):
        if a+1<b+1:
            scatter_plot_by_category('species',df.columns[a+1],df.columns[b+1])
            plt.xlabel(df.columns[a+1])
            plt.ylabel(df.columns[b+1])
            plt.title('species')
            plt.show()

plt.figure(figsize=(20, 10)) #利用seaborn库绘制三种Iris花不同参数图
for column_index, column in enumerate(df.columns):
    if column == 'species':
        continue
    plt.subplot(3,2, column_index + 1)
    sb.violinplot(x='species',y=column,data=df)
plt.show()

# 首先对数据进行切分,即划分出训练集和测试集
from sklearn.model_selection import train_test_split #调入sklearn库中交叉检验,划分训练集和测试集
all_inputs = df[['alcohol', 'malic_acid', 'ash', 'alcalinity ash', 'magnesium']].values
all_species = df['species'].values

(X_train,
 X_test,
 Y_train,
 Y_test) = train_test_split(all_inputs, all_species, train_size=0.7, random_state=1)#70%的数据选为训练集



Exemplo n.º 46
0
            depth2 = len(eles[6].split(','))
            eles[15] = log(float(eles[15]))
            eles[13] = log(float(eles[13]))
            eles[14] = log(float(eles[14]))
            eles[18] = log(float(eles[18]))
            eles[16] = log(float(eles[16]))
            eles[17] = log(float(eles[17]))

            print >> outh, "\t".join(
                ["wt", eles[9], eles[11], eles[15], eles[13], eles[14]])
            print >> outh, "\t".join(
                ["ko", eles[10], eles[12], eles[18], eles[16], eles[17]])

fh.close()
outh.close()
'''
violin plot
df = pd.read_table (sys.argv[2])
fontsize = 10
fig, axes = plt.subplots()
header = list(df)
del header[0]

fontsize = 10
fig, axes = plt.subplots()
sns.violinplot(header[0],'deletions',data=df,ax=axes)
axes.set_xlabel(header[0])
axes.set_ylabel('deletions')
plt.savefig ("5.pdf",format="pdf")
'''
Exemplo n.º 47
0
    def pushButtonClicked(self):
        code = self.lineEdit.text()

        if code == "0":
            '''
            self.axex.clear()
            self.fig = sns.kdeplot(df["NOX"], df["LSTAT"])
            self.canvas.draw()
            '''
            self.fig1.clear()
            self.fig1.clear()
            self.fig1 = sns.kdeplot(df["NOX"], df["LSTAT"])
            self.fig1.clear()
            self.fig1 = sns.kdeplot(df["NOX"], df["LSTAT"])
            self.canvas.draw()

        elif code == "1":
            self.fig1 = sns.violinplot(x="RM_int", y="MEDV", data=df)
            self.fig1.clear()
            self.fig1 = sns.violinplot(x="RM_int", y="MEDV", data=df)
            self.canvas.draw()

        elif code == "2":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       data=df,
                                       orient="v")
            self.fig1.clear()
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       data=df,
                                       orient="v")
            self.canvas.draw()

        elif code == "3":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.fig1.clear()
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.canvas.draw()

        elif code == "4":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.fig1.clear()
            self.fig1 = plt.scatter(df["CRIM"], df["MEDV"])
            plt.xlabel("Per capita crime rate by town (CRIM)")
            plt.ylabel("Housing Price")
            plt.title("Relationship between CRIM and Price")
            self.canvas.draw()

        elif code == "5":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.fig1.clear()
            self.fig1 = plt.scatter(df["RM"], df["MEDV"])
            plt.xlabel("Average number of rooms per dwelling(RM)")
            plt.ylabel("Housing Price")
            plt.title("Relationship between RM and Price")
            self.canvas.draw()

        elif code == "6":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.fig1.clear()
            self.fig1 = plt.scatter(df["PTRATIO"], df["MEDV"])
            plt.xlabel("Pupil-teacher ratio by town(PTRATIO)")
            plt.ylabel("Housing Price")
            plt.title("Relationship between PTRATIO and Price")
            self.canvas.draw()

        elif code == "7":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.fig1.clear()
            self.fig1 = plt.scatter(df["ZN"], df["MEDV"])
            plt.xlabel(
                "proportion of residential land zoned for lots over 25,000 sq.ft.(ZN)"
            )
            plt.ylabel("Housing Price")
            plt.title("Relationship between ZN and Price")
            self.canvas.draw()

        elif code == "8":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.fig1.clear()
            self.fig1 = plt.scatter(df["INDUS"], df["MEDV"])
            plt.xlabel(
                "proportion of non-retail business acres per town(INDUS)")
            plt.ylabel("Housing Price")
            plt.title("Relationship between INDUS and Price")
            self.canvas.draw()

        elif code == "9":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.fig1.clear()
            self.fig1 = plt.scatter(df["NOX"], df["MEDV"])
            plt.xlabel(
                "nitric oxides concentration (parts per 10 million(NOX)")
            plt.ylabel("Housing Price")
            plt.title("Relationship between NOX and Price")
            self.canvas.draw()

        elif code == "10":
            self.fig1 = sns.violinplot(x="RM_int",
                                       y="MEDV",
                                       hue="CHAS",
                                       split=True,
                                       data=df)
            self.fig1.clear()
            self.fig1 = sns.regplot(y="MEDV", x="RM", data=df, fit_reg=True)
            self.canvas.draw()
print(
    "no. The discrepencies between the life expectancy at birth in each country is much higher than the GDP in each country."
)

# ## Step 6. Violin Plots To Compare Life Expectancy Distributions

# Another way to compare two datasets is to visualize the distributions of each and to look for patterns in the shapes.
#
# We have added the code to instantiate a figure with the correct dimmensions to observe detail.
# 1. Create an `sns.violinplot()` for the dataframe `df` and map `Country` and `LEABY` as its respective `x` and `y` axes.
# 2. Be sure to show your plot

# In[12]:

fig = plt.subplots(figsize=(15, 10))
sns.violinplot(x="Country", y="LEABY", data=df)
plt.show()
plt.savefig('ViolinplotofLEABYbycountry.png')

# What do you notice about this distribution? Which country's life expactancy has changed the most?

#

# ## Step 7. Bar Plots Of GDP and Life Expectancy over time
#
# We want to compare the GDPs of the countries over time, in order to get a sense of the relationship between GDP and life expectancy.
#
# First, can plot the progession of GDP's over the years by country in a barplot using Seaborn.
# We have set up a figure with the correct dimensions for your plot. Under that declaration:
# 1. Save `sns.barplot()` to a variable named `ax`
# 2. Chart `Country` on the x axis, and `GDP` on the `Y` axis on the barplot. Hint: `ax = sns.barplot(x="Country", y="GDP")`
Exemplo n.º 49
0
                                                 trainX,
                                                 trainy,
                                                 cv=kfold,
                                                 scoring=scoring)
    results.append(cv_results)
    names.append(name)

    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

# Compare Algorithms by accuracy measures during the 10-fold validation
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
#plt.boxplot(results)
sns.violinplot(data=results, ax=ax)
ax.set_xticklabels(names)
plt.show()

# Make predictions on validation dataset
clf = LinearDiscriminantAnalysis()
clf.fit(trainX, trainy)
predictions = clf.predict(testX)

print('accuracy score', accuracy_score(testy, predictions))
# from sklearn
print('Confusion matrix from sklearn\n')
print(confusion_matrix(testy, predictions))
# custom confusion matrix
plt.figure()
plt_cnf_matrix(confusion_matrix(testy, predictions),
Exemplo n.º 50
0
plt.rc("grid", linestyle="dotted", color="gray", alpha=0.7)
plt.grid()
# creates a boxplot based off entire iris dataset [17]
# uses inbuilt sns colour palette 'colorblind' [18]
sns.boxplot(data=iris, palette="colorblind")
# adds bold title to boxplot
plt.title("Boxplot of Iris Variables", weight="bold")
# saves resulting plot to designated subfolder
plt.savefig("data-visualizations/boxplot - iris.png")
# displays plot to user in pop up window
plt.show()

plt.rc("grid", linestyle="dotted", color="gray", alpha=0.7)
plt.grid()
# creates a violinplot based off entire iris dataset [19]
sns.violinplot(data=iris, palette="colorblind")
plt.title("Violinplot of Iris Variables", weight="bold")
plt.savefig("data-visualizations/violinplot - iris.png")
plt.show()

#########################################################################################################################
# 6. Violinplots of Each Variable
#########################################################################################################################

# the below code works the same as the above but instead of using the entire dataset,
# individual numeric variables are chosen and plotted by species
# no palette is mentioned so it uses the default set 'colors' palette

# violinplot of sepal length
plt.rc("grid", linestyle="dotted", color="gray", alpha=0.7)
plt.grid()
Exemplo n.º 51
0
relation_grade_ave = [
    sum(data[data.Relation == i].numeric_class) /
    float(len(data[data.Relation == i])) for i in relation
]
ax = sns.barplot(x=relation, y=relation_grade_ave)
plt.title('Relation with father or mother affects success of students')

# * Having relation with mum has positive effect on these students
# * Students who have relation with their mum is more successful

# In[ ]:

#Lets look at how many times the student participate on discussion groups
discussion = data.Discussion
discussion_ave = sum(discussion) / len(discussion)
ax = sns.violinplot(y=discussion, split=True, inner='quart')
ax = sns.swarmplot(y=discussion, color='black')
ax = sns.swarmplot(y=unsuccess.Discussion, color='red')
plt.title('Discussion group participation')

# * These two students are under the average of discussion.
# * Average is 43. Therefore, participating discussion groups can be important success of these two students

# In[ ]:

# Now lastly lets look at
absence_day = data.StudentAbsenceDays.unique()
absense_day_ave = [
    sum(data[data.StudentAbsenceDays == i].numeric_class) /
    float(len(data[data.StudentAbsenceDays == i])) for i in absence_day
]
Exemplo n.º 52
0
    x="labels_name",
    y="score",
    hue="Feature",
    data=df_complete,
    order=["Controls", "SCZ Cluster 1", "SCZ Cluster 2", "SCZ Cluster 3"])
plt.legend(loc='lower left')
plt.savefig(os.path.join(output, "cluster_weights.png"))

plt.figure()
sns.set_style("whitegrid")
ax = sns.barplot(
    x="labels_name",
    y="age",
    data=df,
    order=["Controls", "SCZ Cluster 1", "SCZ Cluster 2", "SCZ Cluster 3"])
plt.savefig(os.path.join(output, "age.png"))

#############################################################################

#ANOVA on age

T, p = scipy.stats.f_oneway(df[df["labels"]==0]["age"],\
                     df[df["labels"]==1]["age"],\
 df[df["labels"]==2]["age"])
ax = sns.violinplot(
    x="labels_name",
    y="age",
    data=df,
    order=["Controls", "SCZ Cluster 1", "SCZ Cluster 2", "SCZ Cluster 3"])
plt.title("ANOVA patients: t = %s, and  p= %s" % (T, p))
plt.savefig(os.path.join(output, "age_anova.png"))
#Kernel Density Estimate plot ... Bivariate Distribution
k1 = sns.kdeplot(movies.Rotten_ratings, movies.Audience_ratings, shade=True)
sns.set_style('dark')
k2 = sns.kdeplot(movies.Budget_millions, movies.Audience_ratings)
k3 = sns.kdeplot(movies.Budget_millions, movies.Rotten_ratings)

# SUBPLOTS

f, axes = plt.subplots(1, 2, figsize=(12, 6), sharex=True, sharey=True)
k2 = sns.kdeplot(movies.Budget_millions, movies.Audience_ratings, ax=axes[0])
k3 = sns.kdeplot(movies.Budget_millions, movies.Rotten_ratings, ax=axes[1])
k2.set(xlim=(-20, 160))

#boxplots vs violinplot
v = sns.violinplot(data=movies, x='Genre', y='Rotten_ratings')
w = sns.boxplot(data=movies, x='Genre', y='Rotten_ratings')
#Genre specific violinplot broken down by year
v2 = sns.violinplot(data=movies[movies.Genre == 'Drama'],
                    x='Year',
                    y='Rotten_ratings')

#FacetGrid both lines of code must run together
g = sns.FacetGrid(movies, row='Genre', col="Year", hue='Genre')
kws = dict(s=50, linewidth=0.5, edgecolor='black')
g = g.map(plt.scatter, 'Rotten_ratings', 'Audience_ratings', **kws)
#                    facet grids can be populated with any type of chart
g = sns.FacetGrid(movies, row='Genre', col="Year", hue='Genre')
g = g.map(plt.hist, 'Budget_millions')

#controlling axes and adding diagonals
Exemplo n.º 54
0
sizes = [104,109,93,99,64]
colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue','purple']
explode = (0, 0.1, 0, 0,0)  # explode 1st slice

# Plot
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct='%1.1f%%', shadow=True, startangle=140)

plt.axis('equal')
plt.title("Pie chart for Position")
plt.show()


#violin plot

fig = sns.violinplot( y=DF["Position"], x=DF["PointsPS"] ).set_title("Violin plots for points")
plt.xlabel("Points per Second")
plt.ylabel("Position")
plt.show(fig)

#correlogram
DF1 = DF[["FieldGoalsMadePS","FreeThrowsMadePS","ThreePointersMadePS","TwoPointersMadePS","Position"]]
DF1
sns.pairplot(DF1, kind="scatter", hue="Position")

#box
fig = sns.boxplot(x="Position",y="StealsPS",data=DF).set_title("Steals boxplot")
plt.ylabel("Steals per Second")
plt.show(fig)

#Marginal Plots
Exemplo n.º 55
0
         y='features',
         orient='h',
         data=dd,
         hue='Culture',
         ax=ax,
         notch=True,
         flierprops=flierprops,
         palette="hls"
     )  #Set2 is also somewhat okay, #sns.hls_palette(8, l=.3, s=.8))
 else:
     sns.violinplot(
         x='value',
         y='features',
         orient='h',
         data=dd,
         hue='Culture',
         ax=ax,
         notch=True,
         flierprops=flierprops,
         palette="hls"
     )  #Set2 is also somewhat okay, #sns.hls_palette(8, l=.3, s=.8))
 ax.get_legend().remove()
 ax.tick_params(axis="x", direction="in")
 ax.tick_params(axis="y", direction="in", pad=-2)
 ax.xaxis.set_major_locator(MultipleLocator(1))
 if idx < 18:
     ax.set_title(r'$\bf{}$'.format(value_vars[0].replace('_mean', '')),
                  loc='right',
                  position=(1.0, 0.7),
                  size=10)
     ax.yaxis.set_ticklabels(['mean', 'std'])
Exemplo n.º 56
0
print("")
print("Std Goal and Pledged values")
print(round(df_kick[["goal", "pledged"]].std(), 2))

# <h2>Looking the State variable</h2>
# - pledge log by state
# - goal log by state
# - goal log x pledged log

# In[9]:

plt.figure(figsize=(12, 8))
plt.subplots_adjust(hspace=0.75, top=0.75)

ax1 = plt.subplot(221)
ax1 = sns.violinplot(x="state", y="pledge_log", data=df_kick, palette="hls")
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=45)
ax1.set_title("Understanding the Pledged values by state", fontsize=15)
ax1.set_xlabel("State Description", fontsize=12)
ax1.set_ylabel("Pledged Values(log)", fontsize=12)

ax2 = plt.subplot(222)
ax2 = sns.violinplot(x="state", y="goal_log", data=df_kick)
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45)
ax2.set_title("Understanding the Goal values by state", fontsize=15)
ax2.set_xlabel("State Description", fontsize=12)
ax2.set_ylabel("Goal Values(log)", fontsize=12)

ax0 = plt.subplot(212)
ax0 = sns.regplot(x="goal_log", y="pledge_log", data=df_kick, x_jitter=False)
ax0.set_title("Better view of Goal x Pledged values", fontsize=15)
Exemplo n.º 57
0
def plotParamsdf(df=None, number_points=0, box=False):
    if not type(df):
        df = getParamDistrib(number_points)

    param_names = [
        r"$\gamma_L$",
        r"$\eta_x$",
        r"$\gamma_x$",
        r"$\theta_L$",
        r"$\omega_x$",
        r"$\theta_x$",
        r"$\delta_L$",
        r"$\delta_x$",
        #r"$\delta_y$", #to remove
        r"$\rho_x$",
        r"$n_y$",
        r"$m_x$"
    ]
    units = [
        r"$nM/min$", r"$nM/min$", r"$nM/min$", r"$nM^{-1}$", r"$nM^{-1}$",
        r"$nM^{-1}$", r"$min^{-1}$", r"$min^{-1}$", r"$min^{-1}$", "", ""
    ]

    fig, axes = plt.subplots(4, 3)

    for i, (param_name, unit) in enumerate(zip(param_names, units)):
        if param_name:
            ax = axes.flat[i]
            if box:
                sns.boxplot(data=df[param_name], ax=ax)  #,palette="Pastel1")
            else:
                sns.violinplot(data=df[param_name],
                               ax=ax,
                               cut=0,
                               color="#3274a1")  #,palette="Pastel1")
            ax.set_xticks([])
            #ax.set_xticks([0])
            #ax.set_xticklabels([param_name])
            if unit:
                ax.set_ylabel(param_name + " [" + unit + "]")
            else:
                ax.set_ylabel(param_name)

            #ax.set_yscale('log')
    """
    for param_id in range(len(param_names)):
        ax = axes.flat[param_id]

        sns.violinplot(y = param_names[param_id], x="Model id", data=df[[param_names[param_id], "Model id"]], ax = ax) #,palette="Pastel1")
    """
    fig = plt.gcf()

    fig.set_size_inches([15, 12])
    if box:
        plt.savefig(os.path.join('results_robustness',
                                 'params_distrib_sns_box.pdf'),
                    bbox_inches='tight')
    else:
        plt.savefig(os.path.join('results_robustness',
                                 'params_distrib_sns.pdf'),
                    bbox_inches='tight')
    plt.show()
Exemplo n.º 58
0
print('Males: {m} , Females: {f}'.format(m=np.sum(sex == 'M'),
                                         f=np.sum(sex == 'F')))
print('Age : {m:.2f} +/- {s:.2f}'.format(m=np.mean(age), s=np.std(age)))
print('X-ray Tube Current: ({a},{b})'.format(a=np.min(current),
                                             b=np.max(current)))
print('KVP: ({a},{b})'.format(a=np.min(kvp), b=np.max(kvp)))
print('Exposure Time: ({a},{b})'.format(a=np.min(time), b=np.max(time)))
print('Exposure (mAs) min/max : ({a},{b})'.format(
    a=np.min(current * time / 1000), b=np.max(current * time / 1000)))
print('Exposure (mAs) mean/std : ({a:.2f},{b:.2f})'.format(
    a=np.mean(current * time / 1000), b=np.std(current * time / 1000)))
print('-----------------------------')

# Visualization
plt.figure()
sns_plot = sns.violinplot(x='Diagnosis', y='mAs', data=csv_data, split=False)
plt.ylabel('Exposure (mAs)')
plt.xlabel('Disease')
fig = sns_plot.get_figure()
# fig.savefig("mas.png",dpi=300)

plt.figure()
sns_plot = sns.countplot(x='Diagnosis', data=csv_data, hue='Patient Sex')
plt.ylabel('Number of Cases')
plt.xlabel('Disease')
fig = sns_plot.get_figure()
# fig.savefig("sex.png",dpi=300)

plt.figure()
sns_plot = sns.boxplot(x='Diagnosis', y='Age', data=csv_data)
plt.ylabel('Age (year)')