def on_draw(self):
     plt.sca(self.ax)
     plt.clf()
     self.ax = plt.axes()
     if self.df is None:
         message = "Select two or more variables from list"
         self.ax.text(0.5,
                      0.5,
                      message,
                      horizontalalignment='center',
                      verticalalignment='center',
                      fontsize=16)
     else:
         plt.sca(self.ax)
         sns.corrplot(self.df,
                      annot=False,
                      sig_stars=True,
                      cmap_range="full",
                      diag_names=False,
                      sig_corr=False,
                      cmap=self.cmap,
                      ax=self.ax,
                      cbar=True)
     plt.tight_layout()
     self.draw()
Пример #2
0
def res_matrix(mark,state,cut_off=40):
    path = os.path.join(get_data_dir(), "tmp", "{0} in {1}-{2}.csv".format(mark, state,cut_off))
    DF = pd.read_csv(path, sep='\t')
    Full_EID_list = get_full_EID_list()
    res_matrix = []
    tmp = [0.]*len(Full_EID_list)
    for i in range(0,len(DF.index),1):
        try:
            if DF.chromMiddle[i-1] == DF.chromMiddle[i]:
                tmp[Full_EID_list.index(DF.EID[i])] = DF.signalValue[i]
            else:
                res_matrix.append(tmp)
                tmp = [0.]*len(Full_EID_list)
        except:
            pass
    
    f, ax = plt.subplots(figsize=(15, 15))
    cmap = sns.diverging_palette(210, 10, as_cmap=True)
    sns.corrplot(np.array(res_matrix), annot=False, sig_stars=False,   # .T??
             diag_names=False, cmap=cmap, ax=ax)
    f.tight_layout()
    plt.show()
     
    path2 = os.path.join(get_data_dir(), "tmp","{0} in {1}-{2}_diff.csv".format(mark,state,cut_off)) 
    a = open(path2,'w')
    for i in range(0,len(res_matrix[0]),1):
        for j in range(0,len(res_matrix),1):
            a.write(str(res_matrix[j][i])+"\t")
        a.write("\n")
    a.close() 
Пример #3
0
def make_plot(X_train, y_train, X, y, test_data, model, model_name, features, response):
    feature = X.columns
    f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharey=False)
    sns.regplot(X[feature[4]], y, test_data, ax=ax1)
    sns.boxplot(X[feature[4]], y, color="Blues_r", ax=ax2)
    model.fit(X_train, y_train)
    sns.residplot(X[feature[4]], (model.predict(X) - y) ** 2, color="indianred", lowess=True, ax=ax3)
    if model_name is 'linear':
        sns.interactplot(X[feature[3]], X[feature[4]], y, ax=ax4, filled=True, scatter_kws={"color": "dimgray"}, contour_kws={"alpha": .5})
    elif model_name is 'logistic':
        pal = sns.blend_palette(["#4169E1", "#DFAAEF", "#E16941"], as_cmap=True)
        levels = np.linspace(0, 1, 11)
        sns.interactplot(X[feature[3]], X[feature[4]], y, levels=levels, cmap=pal, logistic=True)
    else:
        pass
    ax1.set_title('Regression')
    ax2.set_title(feature[4]+' Value')
    ax3.set_title(feature[4]+' Residuals')
    ax4.set_title('Two-value Interaction')
    f.tight_layout()
    plt.savefig(model_name+'_'+feature[4], bbox_inches='tight')

    # Multi-variable correlation significance level
    f, ax = plt.subplots(figsize=(10, 10))
    cmap = sns.blend_palette(["#00008B", "#6A5ACD", "#F0F8FF",
                              "#FFE6F8", "#C71585", "#8B0000"], as_cmap=True)
    sns.corrplot(test_data, annot=False, diag_names=False, cmap=cmap)
    ax.grid(False)
    ax.set_title('Multi-variable correlation significance level')
    plt.savefig(model_name+'_multi-variable_correlation', bbox_inches='tight')

    # complete coefficient plot - believe this is only for linear regression
    sns.coefplot("diagnosis ~ "+' + '.join(features), test_data, intercept=True)
    plt.xticks(rotation='vertical')
    plt.savefig(model_name+'_coefficient_effects', bbox_inches='tight')
Пример #4
0
def make_corr_plot(d, title="plot"):
    f, ax = plt.subplots(figsize=(9, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(d, annot=False, sig_stars=False,
                 diag_names=False, cmap=cmap, ax=ax)
    f.tight_layout()
    plt.title(title)
    f.savefig(title)
def computeCorrelation(dataFrame, candidatesList,name):
    fig, axes = plt.subplots(figsize=(12,12))
    dfCorr = dataFrame[candidatesList]
    cmap = sb.blend_palette(["#6B229F", "#FD3232", "#F66433",
                          "#E78520", "#FFBB39"], as_cmap=True)
    sb.corrplot(dfCorr, annot=False, sig_stars=False,
             diag_names=False, cmap=cmap)
    axes.set_title("Correlation Matrix - " + name )
    plt.savefig('Correlation_'+candidatesList[0]+'_.png')
Пример #6
0
def corrplot(mod_dis):
    df_model = []
    for label, data in mod_dis.items():
        inds = np.triu_indices(data.shape[0], k=1)
        df_model.append(data[inds])

    df_model = pandas.DataFrame(np.array(df_model).T,
                                columns=mod_dis.keys())

    sns.corrplot(df_model)
Пример #7
0
def correlateRDMs(allrsasimspaces, models):
    spaces = []
    for m in models:
        spaces.append(np.array(allrsasimspaces[m]['simmat_across']).flatten())
    spaces = np.array(spaces)
    spaces = pd.DataFrame(data={model: spaces[modeln] for modeln, model in enumerate(models)})
    spaces = spaces[models]
    f, ax = plt.subplots(figsize=[12, 12])
    sns.corrplot(spaces, diag_names=False, sig_stars=False)
    return spaces.corr()
Пример #8
0
def correlations(data, X):
    X_title = "_".join([i for i in X.columns.tolist()])
    f, ax = plt.subplots(figsize=(10, 10))
    cmap = sns.blend_palette(["#00008B", "#6A5ACD", "#F0F8FF",
                              "#FFE6F8", "#C71585", "#8B0000"], as_cmap=True)
    sns.corrplot(data, annot=False, diag_names=False, cmap=cmap)
    ax.grid(False)
    plt.savefig('visuals/'+X_title+'_correlation')
    print('visuals/'+X_title+'_correlation')
    plt.close()
Пример #9
0
def seaborn_plot(df,plot_type='pairplot',columns=False):
	sns.set()
	mpl.rc("figure", figsize=(16, 8.65))
	plotting_df=(df[columns] if columns else df)
	if plot_type=='pairplot':
		sns.pairplot(plotting_df)
	elif plot_type=='corr_plot':
		sns.corrplot(plotting_df)
	sns.plt.show()
	return
Пример #10
0
def seaborn_plot(df, columns, plot_type='pairplot'):
    sns.set()
    mpl.rc("figure", figsize=(16, 8.65))
    plotting_df = df[columns]
    if plot_type == 'pairplot':
        sns.pairplot(plotting_df)
    elif plot_type == 'corr_plot':
        sns.corrplot(plotting_df)
    sns.plt.show()
    return
Пример #11
0
def visualize_correlations(training_data):
    """
    Generates a correlation matrix heat map.
    """
    fig, ax = plt.subplots(figsize=(16, 10))
    colormap = sb.blend_palette(sb.color_palette('coolwarm'), as_cmap=True)
    if len(training_data.columns) < 30:
        sb.corrplot(training_data, annot=True, sig_stars=False, diag_names=True, cmap=colormap, ax=ax)
    else:
        sb.corrplot(training_data, annot=False, sig_stars=False, diag_names=False, cmap=colormap, ax=ax)
    fig.tight_layout()
Пример #12
0
def l_reg(input_path):
    DF = pd.read_csv(input_path)
    DF.drop('gene_id', axis=1, inplace=True)
    #corr_mat = np.corrcoef(DF.as_matrix())
    f, ax = plt.subplots(figsize=(20, 20))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(DF.as_matrix().T, annot=False, sig_stars=False,
             diag_names=False, cmap=cmap, ax=ax)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    plt.savefig(os.path.join(get_data_dir(), "tmp", "H3K27me3_corrplot.png"))
Пример #13
0
def corrplot(ax):

    rs = np.random.RandomState(0)
    x0, x1 = rs.randn(2, 60)
    x2, x3 = rs.multivariate_normal([0, 0], [(1, -.5), (-.5, 1)], 60).T
    x2 += x0 / 8
    x4 = x1 + rs.randn(60) * 2
    data = np.c_[x0, x1, x2, x3, x4]

    sns.corrplot(data, ax=ax)
    ax.set_title("corrplot()", verticalalignment="top")
def plot_pt_corr(df):
    """
    plot the correlation matrix of the posteriors of the parameters
    """

    f, ax = P.subplots(figsize=(9, 9))
    cmap = sns.blend_palette(["#00008B", "#6A5ACD", "#F0F8FF",
                              "#FFE6F8", "#C71585", "#8B0000"], as_cmap=True)
    sns.corrplot(df, annot=True, sig_stars=True, method='spearman',
                 diag_names=True, cmap=cmap, ax=ax)
    f.tight_layout()
Пример #15
0
def corrplot(ax):

    rs = np.random.RandomState(0)
    x0, x1 = rs.randn(2, 60)
    x2, x3 = rs.multivariate_normal([0, 0], [(1, -.5), (-.5, 1)], 60).T
    x2 += x0 / 8
    x4 = x1 + rs.randn(60) * 2
    data = np.c_[x0, x1, x2, x3, x4]

    sns.corrplot(data, ax=ax)
    ax.set_title("corrplot()", verticalalignment="top")
Пример #16
0
 def get_cor_matrix( self,method="pearson" ):
    self.method = method
    
    out_cor_file   = "%s.corMat.%s.pdf" % ( ".".join( self.infile.split(".")[:-2] ), self.method )
    
    pd_mat = pd.DataFrame( self.mat.matrix )
    pd_mat.columns = self.mat.colname
    pd_mat.index   = self.mat.rowname
    self.cor_mat   = pd_mat.corr( self.method ).values
    
    sns.set(style="darkgrid")
    f, ax = plt.subplots(figsize=(9, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(pd_mat, annot=False, sig_stars=False, diag_names=False, cmap=cmap, ax=ax, cmap_range=(0.0, 1.0),method=self.method  )
    f.savefig( out_cor_file,format="pdf" )
Пример #17
0
def plot_correlations(df, include_numbers, filename):
    '''
    Plot maps of cross correlations of input and output variables.
    '''
    plt.figure()
    sns.set()
    sns.corrplot(df, annot=include_numbers)
    plt.savefig(filename)
    plt.close()
    return


#=====================================================================
# EOF
#=====================================================================
Пример #18
0
def corrplot_example():
    """
        Birds-eye view of a large dataset to see correlation matrix with a
        heat map.  Also gets a permutationt test to get p values.  If you
        have a huge dataset, will take a while and p values aren't relevant.
    """
    titanic = sns.load_dataset("titanic").dropna()  # load dataset1
    attention = sns.load_dataset("attention")  # load dataset2
    sns.set_context(rc={"figure.figsize": (8, 8)})  # set size
    sns.corrplot(titanic)  # plot dataset1
    #sns.corrplot(titanic,  # dataset
    #             sig_tail="upper",  # specify if only want pos or neg values
    #             cmap_range(-.3, 0))  # specify colormap range
    sns.corrplot(attention)  # plot dataset2
    plt.show()
Пример #19
0
def corrplot_example():
    """
        Birds-eye view of a large dataset to see correlation matrix with a
        heat map.  Also gets a permutationt test to get p values.  If you
        have a huge dataset, will take a while and p values aren't relevant.
    """
    titanic = sns.load_dataset("titanic").dropna()  # load dataset1
    attention = sns.load_dataset("attention")  # load dataset2
    sns.set_context(rc={"figure.figsize": (8,8)})  # set size
    sns.corrplot(titanic)  # plot dataset1
    #sns.corrplot(titanic,  # dataset
    #             sig_tail="upper",  # specify if only want pos or neg values
    #             cmap_range(-.3, 0))  # specify colormap range
    sns.corrplot(attention)  # plot dataset2
    plt.show()
Пример #20
0
def plot_corr_matrix(X) :
	"""
	Plots correlation matrix for data
	"""
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt

	sns.set(style="white")

	labels = ["radar dist",
				"Ref",
				"Ref 5x5 10th",
				"Ref 5x5 50th",
				"Ref 5x5 90th",
				"RefComposite",
				"RefComposite 5x5 10th",
				"RefComposite 5x5 50th",
				"RefComposite 5x5 90th",
				"Rho_HV",
				"Rho_HV 5x5 10th",
				"Rho_HV 5x5 50th",
				"Rho_HV 5x5 90th",
				"Zdr",
				"Zdr 5x5 10th",
				"Zdr 5x5 50th",
				"Zdr 5x5 90th",
				"Kdp",
				"Kdp 5x5 10th",
				"Kdp 5x5 50th",
				"Kdp 5x5 90th",
				"Expected"]

	d = pd.DataFrame(data=X[:,2:].copy(), columns=labels)

	# Compute the correlation matrix
	corr = d.corr()

	# Set up the matplotlib figure
	f, ax = plt.subplots(figsize=(11, 9))

	sns.corrplot(d)
	ax.set_title('Correlation Matrix for Radar Features and Output Variable')
	ax.set_xlabel('Features (along diagonal)')
	ax.set_ylabel('Correlation Values (upper triangle)')
	f.tight_layout()
	plt.show()
Пример #21
0
def preplot(df):
    _ = sns.pairplot(df[:50], vars = [8, 11, 12, 14, 19], hue = 'class', size = 1.5)
    plt.show()

    plt.figure(figsize = (12, 10))
    _ = sns.corrplot(df, annot = False)
    plt.show()
def build_corrmatrix_dashboard(train_pre):

    plt = sns.corrplot(train_pre, annot=False)
    #sns.corrplot(train_pre)

    print 'Saving correlation matrix in figures/.'
    plt.savefig("figures/corr_matrix.png")
def build_corrmatrix_dashboard(train_pre):


    plt = sns.corrplot(train_pre,annot=False)
    #sns.corrplot(train_pre)

    print 'Saving correlation matrix in figures/.'
    plt.savefig("figures/corr_matrix.png")
Пример #24
0
def attribute_correlations(df, img_file='attr_correlations.png'):
    logging.debug('Plotting attribute pairwise correlations')
    # custom figure size (in inches) to cotrol the relative font size
    fig, ax = plt.subplots(figsize=(10, 10))
    # nice custom red-blue diverging colormap with white center
    cmap = sns.diverging_palette(250, 10, n=3, as_cmap=True)
    # Correlation plot
    # - attribute names on diagonal
    # - color-coded correlation value in lower triangle
    # - values and significance in the upper triangle
    # - color bar
    # If there a lot of attributes we can disable the annotations:
    # annot=False, sig_stars=False, diag_names=False
    sns.corrplot(df, ax=ax, cmap=cmap)
    # remove white borders
    fig.tight_layout()
    fig.savefig(img_file)
    plt.close(fig)
Пример #25
0
def display_corr_matrix():
	'''
	function plots a correlation matrix heat map
	'''
	global DF

	### create a correlation matrix heatmap to look for colinearity
	data = DF
	sns.set(color_codes=True)
	f, ax = plt.subplots(figsize=(9, 9))
	cmap = sns.blend_palette(["#00008B", "#6A5ACD", "#F0F8FF",
	                          "#FFE6F8", "#C71585", "#8B0000"], as_cmap=True)
	sns.corrplot(data, annot=False, sig_stars=False,
	             diag_names=False, cmap=cmap, ax=ax)
	sns.plt.title('Figure 1: Correlation Matrix Heatmap')
	f.tight_layout()
	sns.despine()
	sns.plt.show()
Пример #26
0
def attribute_correlations(df, img_file='attr_correlations.png'):
    logging.debug('Plotting attribute pairwise correlations')
    # custom figure size (in inches) to cotrol the relative font size
    fig, ax = plt.subplots(figsize=(10, 10))
    # nice custom red-blue diverging colormap with white center
    cmap = sns.diverging_palette(250, 10, n=3, as_cmap=True)
    # Correlation plot
    # - attribute names on diagonal
    # - color-coded correlation value in lower triangle
    # - values and significance in the upper triangle
    # - color bar
    # If there a lot of attributes we can disable the annotations:
    # annot=False, sig_stars=False, diag_names=False
    sns.corrplot(df, ax=ax, cmap=cmap)
    # remove white borders
    fig.tight_layout()
    fig.savefig(img_file)
    plt.close(fig)
Пример #27
0
def plotSocialMedia(df):
    df = df.apply(pd.to_numeric, errors='ignore')
    print(df.info())

    fig, ax = plt.subplots(figsize=(10, 10))
    sns.corrplot(df, ax=ax)
    plt.savefig('socialNetworkCorrelations.png', tight_layout=True)
    plt.close()

    # mean session duration
    df.sort_values("avgSessionDuration", ascending=False, inplace=True)
    g = sns.barplot(y='socialNetwork', x='avgSessionDuration', hue='userType', data=df)
    g.set(xlabel='Average Session Duration', ylabel='')
    plt.subplots_adjust(left=.17)
    sns.despine(left=True, bottom=True)
    plt.savefig('socialNetworkAvgDuration.png')
    plt.close()

    # bounceRate
    df.sort_values("bounceRate", inplace=True)
    g = sns.barplot(y='socialNetwork', x='bounceRate', hue='userType', data=df)
    g.set(xlabel='Bounce Rate', ylabel='')
    plt.subplots_adjust(left=.17)
    sns.despine(left=True, bottom=True)
    plt.savefig('socialNetworkBounceRate.png')
    plt.close()

    # avgTimeOnPage
    df.sort_values("avgTimeOnPage", ascending=False, inplace=True)
    g = sns.barplot(y='socialNetwork', x='avgTimeOnPage', hue='userType', data=df)
    g.set(xlabel='Average Time on Page', ylabel='')
    plt.subplots_adjust(left=.17)
    sns.despine(left=True, bottom=True)
    plt.savefig('socialNetworkavgTimeOnPage.png')
    plt.close()

    # goals completed
    df.sort_values("goalCompletionsAll", ascending=False, inplace=True)
    g = sns.barplot(y='socialNetwork', x='goalCompletionsAll', hue='userType', data=df)
    g.set(xlabel='Goal Completions', ylabel='', xscale='log')
    plt.subplots_adjust(left=.17)
    sns.despine(left=True, bottom=True)
    plt.savefig('socialNetworkgoalCompletionsAll.png')
    plt.close()
Пример #28
0
def get_feature_corr(df_k, keepers, scale_it=True):
    if scale_it:
        X_k = scale(np.array(df_k, dtype=float))
    else:
        X_k = np.array(df_k, dtype=float)
    df_xk = pd.DataFrame(X_k)
    df_xk.columns = keepers

    sns.set(style="darkgrid")
    fig, ax = plt.subplots(figsize=(15, 15))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(df_xk,
                 annot=True,
                 sig_stars=True,
                 diag_names=False,
                 cmap=cmap,
                 ax=ax)
    ax.set_title('Correlation between training features')
    fig.tight_layout()
    return fig
def fig_correlations(data, aly_title, fig_save = True):
    """ Plot correlations

    Parameters
    ----------
    data : pd.DataFrame
    aly_title : str
    fig_save : bool, optional
        False if data should not be saved
        

    """
    ff = file_folder_specs()

    plt.figure()
    sns.corrplot(data, diag_names = False)
    plt.title(aly_title)
    if fig_save:
        _save_fig(aly_title, ff['fig'])
    plt.show()
    plt.close()
Пример #30
0
def visualize_correlations(training_data):
    """
    Generates a correlation matrix heat map.
    """
    fig, ax = plt.subplots(figsize=(16, 10))
    colormap = sb.blend_palette(sb.color_palette('coolwarm'), as_cmap=True)
    if len(training_data.columns) < 30:
        sb.corrplot(training_data,
                    annot=True,
                    sig_stars=False,
                    diag_names=True,
                    cmap=colormap,
                    ax=ax)
    else:
        sb.corrplot(training_data,
                    annot=False,
                    sig_stars=False,
                    diag_names=False,
                    cmap=colormap,
                    ax=ax)
    fig.tight_layout()
Пример #31
0
def tech_summary():
    closing_df = DataReader(['AAPL', 'GOOG', 'MSFT', 'AMZN'], 'yahoo', start,
                            end)['Adj Close']
    tech_rets = closing_df.pct_change()

    # from IPython.display import SVG
    # SVG(url='http://upload.wikimedia.org/wikipedia/commons/d/d4/Correlation_examples2.svg')
    sns.jointplot('GOOG', 'GOOG', tech_rets, kind='scatter', color='seagreen')
    sns.jointplot('GOOG', 'MSFT', tech_rets, kind='scatter')
    sns.pairplot(tech_rets.dropna())

    # Set up our figure by naming it returns_fig, call PairPLot on the DataFrame
    returns_fig = sns.PairGrid(tech_rets.dropna())

    # Using map_upper we can specify what the upper triangle will look like.
    returns_fig.map_upper(plt.scatter, color='purple')

    # We can also define the lower triangle in the figure, inclufing the plot type (kde) or the color map (BluePurple)
    returns_fig.map_lower(sns.kdeplot, cmap='cool_d')

    # Finally we'll define the diagonal as a series of histogram plots of the daily return
    returns_fig.map_diag(plt.hist, bins=30)

    # Set up our figure by naming it returns_fig, call PairPLot on the DataFrame
    returns_fig = sns.PairGrid(closing_df)

    # Using map_upper we can specify what the upper triangle will look like.
    returns_fig.map_upper(plt.scatter, color='purple')

    # We can also define the lower triangle in the figure, inclufing the plot type (kde) or the color map (BluePurple)
    returns_fig.map_lower(sns.kdeplot, cmap='cool_d')

    # Finally we'll define the diagonal as a series of histogram plots of the closing price
    returns_fig.map_diag(plt.hist, bins=30)

    # Let's go ahead and use sebron for a quick correlation plot for the daily returns
    sns.corrplot(tech_rets.dropna(), annot=True)

    return tech_rets
Пример #32
0
    def get_cor_matrix(self, method="pearson"):
        self.method = method

        out_cor_file = "%s.corMat.%s.pdf" % (".".join(
            self.infile.split(".")[:-2]), self.method)

        pd_mat = pd.DataFrame(self.mat.matrix)
        pd_mat.columns = self.mat.colname
        pd_mat.index = self.mat.rowname
        self.cor_mat = pd_mat.corr(self.method).values

        sns.set(style="darkgrid")
        f, ax = plt.subplots(figsize=(9, 9))
        cmap = sns.diverging_palette(220, 10, as_cmap=True)
        sns.corrplot(pd_mat,
                     annot=False,
                     sig_stars=False,
                     diag_names=False,
                     cmap=cmap,
                     ax=ax,
                     cmap_range=(0.0, 1.0),
                     method=self.method)
        f.savefig(out_cor_file, format="pdf")
Пример #33
0
def explorer(data, name, hue=None, trel=True, corr=True):
    """        
    Draw and save Trellis plots including scatter plots (upper triangle) and kernal density (lower triangle and lower triangle), correlation map with person R and p value. Takes long time with big data.
    
    Args:
        data: dataFrame. Input data arrays.
        name: str. Name of output figure file. 
        hue: str, optional. Name of variable used as hue. 
    Return:
        PairGrid
    """

    if name[-4:] == '.pdf':
        mpl.use('PDF')
    import matplotlib.pyplot as plt
    #sns.set_context("talk", font_scale=1.3)
    if trel:
        print 'Plotting Trellis plots.'
        #sns.set(style="white")
        #f, ax = plt.subplots(figsize=(7, 7))
        #ax.set(xscale="log", yscale="log")
        g = sns.PairGrid(data, hue=hue)
        g.map_lower(sns.kdeplot, cmap="Purples", shade=True)
        g.map_diag(plt.hist)
        g.map_upper(plt.scatter, s=10, alpha=.05)

        g.savefig('trel_' + name, dpi=300)
        plt.close()

    if corr:
        print 'Plotting correlation map.'
        #sns.set_context(rc={"figure.figsize": (16, 16)})
        plt.figure()
        ax = sns.corrplot(data)
        ax.figure.savefig('corr_' + name, dpi=300)
        plt.close()
Пример #34
0
def explorer(data, name, hue=None, trel=True, corr=True):
    """        
    Draw and save Trellis plots including scatter plots (upper triangle) and kernal density (lower triangle and lower triangle), correlation map with person R and p value. Takes long time with big data.
    
    Args:
        data: dataFrame. Input data arrays.
        name: str. Name of output figure file. 
        hue: str, optional. Name of variable used as hue. 
    Return:
        PairGrid
    """

    if name[-4:]=='.pdf': 
        mpl.use('PDF')
    import matplotlib.pyplot as plt
    #sns.set_context("talk", font_scale=1.3)
    if trel:
        print 'Plotting Trellis plots.'
        #sns.set(style="white")
        #f, ax = plt.subplots(figsize=(7, 7))
        #ax.set(xscale="log", yscale="log")
        g = sns.PairGrid(data, hue=hue)
        g.map_lower(sns.kdeplot, cmap="Purples",shade=True)
        g.map_diag(plt.hist)
        g.map_upper(plt.scatter, s=10, alpha=.05)
    
        g.savefig('trel_'+name, dpi = 300)
        plt.close()

    if corr:
        print 'Plotting correlation map.'
        #sns.set_context(rc={"figure.figsize": (16, 16)})
        plt.figure()
        ax = sns.corrplot(data)
        ax.figure.savefig('corr_'+name, dpi = 300)
        plt.close()    
Пример #35
0
def plotpair(df):
    #    pr = sns.pairplot(df[:500], vars=['codeFragNum', 'liNum', 'popTagsNum',
    #                                      'bodyLength', 'titleLength'],
    #                      hue='class', size=1.5)
    cor = sns.corrplot(df, annot=False)
Пример #36
0
    print rawdata.cov()

    print rawdata[['Age', 'Ca']].corr()
    pd.DataFrame.corr(rawdata)
    plt.show()

    # define colors list, to be used to plot survived either red (=0) or green (=1)
    colors = ['red', 'green']

    # make a scatter plot

    # rawdata.info()

    from scipy import stats
    import seaborn as sns  # just a conventional alias, don't know why
    sns.corrplot(rawdata)  # compute and plot the pair-wise correlations
    # save to file, remove the big white borders
    #plt.savefig('attribute_correlations.png', tight_layout=True)
    plt.show()

    attr = rawdata['Age']
    sns.distplot(attr)
    plt.show()

    sns.distplot(attr, kde=False, fit=stats.gamma)
    plt.show()

    # Two subplots, the axes array is 1-d
    plt.figure(1)
    plt.title('Histogram of Age')
    plt.subplot(211)  # 21,1 means first one of 2 rows, 1 col
Пример #37
0
def make_correlation_plot(df):
    f, ax = plt.subplots(figsize=(12, 12))
    sns.corrplot(df, annot=True, sig_stars=False,
             diag_names=False, ax=ax)
Пример #38
0
sns.interactplot(x1, x2, y, colorbar=False, ax=ax)


# Correlation matrix
# ------------------

ax = plt.subplot(gs[4:, 0])

rs = np.random.RandomState(0)
x0, x1 = rs.randn(2, 60)
x2, x3 = rs.multivariate_normal([0, 0], [(1, -.5), (-.5, 1)], 60).T
x2 += x0 / 8
x4 = x1 + rs.randn(60) * 2
data = np.c_[x0, x1, x2, x3, x4]

sns.corrplot(data, ax=ax)
ax.set_title("corrplot()", verticalalignment="top")


# Beta distributions
# ------------------

sns.set(style="nogrid")

ax = plt.subplot(gs[4, 1])
plt.title("distplot()")
plt.xlim(0, 1)
ax.set_xticklabels([])

g, _, p = sns.color_palette("Set2", 3, desat=.75)
n = 1000
sns.pairplot(tech_rets.dropna())
plt.show()

returns_fig = sns.PairGrid(tech_rets.dropna())
returns_fig.map_upper(plt.scatter, color='purple')
returns_fig.map_lower(sns.kdeplot, cmap='cool_d')
returns_fig.map_diag(plt.hist, bins=30)
plt.show()

returns_fig = sns.PairGrid(closing_df)
returns_fig.map_upper(plt.scatter, color='purple')
returns_fig.map_lower(sns.kdeplot, cmap='cool_d')
returns_fig.map_diag(plt.hist, bins=30)
plt.show()

sns.corrplot(tech_rets.dropna(), annot=True)
plt.show()

sns.corrplot(closing_df, annot=True)
plt.show()

''' analyze the risk of a stock '''
rets = tech_rets.dropna()
area = np.pi * 20
plt.scatter(rets.mean(), rets.std(), s = area)
plt.xlabel('Expected Return')
plt.ylabel('Risk')

for label, x, y in zip(rets.columns, rets.mean(), rets.std()):
    plt.annotate(
        label,
dataset = load_boston()
df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
df['target'] = dataset.target

# correlation
corr  = df.corr(method='pearson')

corr.sort_values(by = 'target', inplace = True)

# find pairs with high correlations

import seaborn as sns # just a conventional alias, don't know why
fig, ax = plt.subplots(figsize=(10, 10))

sns.corrplot(df, ax = ax)

fig, ax = plt.subplots(figsize=(10, 10))
sns.distplot(attr)

import matplotlib.pyplot as plt
attr = df.target
plt.hist(attr)


plt.scatter(df.target, df['LSTAT'])
sns.jointplot(df.target, df['LSTAT'], kind='scatter')
sns.jointplot(df.target, df['LSTAT'], kind='hex')


### explore some diagnostic plots: QQ
# In[16]:

sns.pairplot(tech_rets.dropna())


# In[17]:


returns_fig = sns.PairGrid(closing_df)


returns_fig.map_upper(plt.scatter,color='purple')

#  the lower triangle in the figure, inclufing the plot type (kde) or the color map (BluePurple)
returns_fig.map_lower(sns.kdeplot,cmap='cool_d')

#  a series of histogram plots of the closing price
returns_fig.map_diag(plt.hist,bins=30)


# In[18]:

sns.corrplot(tech_rets.dropna(),annot=True)


# In[ ]:



    i+=1

#Dem most anticorrelated
sns_plot = sns.jointplot('Hillary Clinton','Bernie Sanders',pr_piv,kind='scatter')
sns_plot.savefig(OutputFolder+'HillaryClinton_BernieSanders_joinplot.png')


#Primary results assume a choice between Democrats candidates only or
#Republican candidates only
#So comparing Democrats to Republicans based on these results
#does not have a lot of sense
#However let's look on the picture as a whole
heatmap(rvalue,'rvalue.png')
#seabron for a quick correlation plot which is pandas.DataFrame.corr('pearson')
f, ax = plt.subplots(figsize=(15, 15))
sns_plot = sns.corrplot(pr_piv,annot=True, ax=ax)
plt.savefig(OutputFolder+'corrplot.png')
#Let's look now how high is the possibility of the correlation
#between democrat and republican candidates
#we can not trust such results
heatmap(pvalue,'pvalue.png')

#You can take a look at the StdError of the correlation as well
#heatmap(stderr,'stderr.png')




#Hillary Clinton to Republican
sns_plot = sns.jointplot('Hillary Clinton','Donald Trump',pr_piv,kind='scatter')
sns_plot.savefig(OutputFolder+'HillaryClinton_DonaldTrump_joinplot.png')
Пример #43
0
def extra_viz(loansData):
    f, ax = plt.subplots(figsize=(10, 10))
    sns.corrplot(loansData, ax=ax)
    plt.savefig('../figs/loan_corr_matrix.png')
Пример #44
0
##Try Correlation and Corrplot to see what features popout more than others:
df.corr(method='pearson')

pearson = df.corr(method='pearson')
#print pearson
# assume target attr is the last, then remove corr with itself
corr_with_target = pearson.ix[-7][:-1]
#print pearson.ix
print corr_with_target

# correlations by the absolute value:
corr_with_target[abs(corr_with_target).argsort()[::-1]]

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(20, 15))
sns.corrplot(df)  # compute and plot the pair-wise correlations
# save to file, remove the big white borders
plt.savefig('attribute_correlations.png', tight_layout=True)


##Use scikit-learn's SelectKBest feature selection:
def get_k_best(enron_data, features_list, k):
    """ runs scikit-learn's SelectKBest feature selection
        returns dict where keys=features, values=scores
    """
    data = featureFormat(enron_data, features_list)
    labels, features = targetFeatureSplit(data)

    k_best = SelectKBest(k=k)
    k_best.fit(features, labels)
    scores = k_best.scores_
Пример #45
0
import numpy as np
import pandas as pd
from numpy.random import randn
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import Series,DataFrame
array = np.array([[1,3,4,4],[1,4,5,5]])
dframe1 = DataFrame(array,index=list('AB'),columns=list('abcd'))
print(dframe1)

print(dframe1.describe())

import pandas.io.data as pdweb
import datetime

prices_oils = pdweb.get_data_yahoo(['CVX','XOM','BP'],start=datetime.datetime(2013,1,1),end=datetime.datetime(2016,1,1))['Adj Close']

print(prices_oils.head())

prices_volume = pdweb.get_data_yahoo(['CVX','XOM','BP'],start=datetime.datetime(2013,1,1),end=datetime.datetime(2016,1,1))['Volume']
prices_volume.head()

rets = prices_oils.pct_change()

corr = rets.corr

prices_oils.plot()
sns.corrplot(rets,annot=False,diag_names=False)
# We can simply call pairplot on our DataFrame for an automatic visual analysis of all the comparisons
sns.pairplot(tech_rets.dropna())


# In[38]:

returns_fig = sns.PairGrid(tech_rets.dropna())
returns_fig.map_upper(plt.scatter,color='purple')
returns_fig.map_lower(sns.kdeplot,cmap='cool_d')
returns_fig.map_diag(plt.hist,bins=30)


# In[42]:

sns.corrplot(tech_rets.dropna(),annot=True)


# In[43]:

sns.corrplot(closing_df,annot=True)


# ## Risk Analysis

# In[48]:

# Let's start by defining a new DataFrame as a clenaed version of the oriignal tech_rets DataFrame
rets = tech_rets.dropna()
area = np.pi*20
plt.scatter(rets.mean(), rets.std(),alpha = 0.5,s =area)
returns_fig = sns.PairGrid(tech_rets.dropna())
returns_fig.map_upper(plt.scatter, color='purple')
returns_fig.map_lower(sns.kdeplot, cmap='cool_d')
returns_fig.map_diag(plt.hist, bins=30)

# In[31]:

returns_fig = sns.PairGrid(closing_df)
returns_fig.map_upper(plt.scatter, color='purple')
returns_fig.map_lower(sns.kdeplot, cmap='cool_d')
returns_fig.map_diag(plt.hist, bins=30)

# In[32]:

sns.corrplot(tech_rets.dropna(), annot=True)

# In[ ]:

#risk analysis

# In[36]:

# Let's start by defining a new DataFrame as a clenaed version of the oriignal tech_rets DataFrame
rets = tech_rets.dropna()

area = np.pi * 20

plt.scatter(rets.mean(), rets.std(), alpha=0.5, s=area)

# Set the x and y limits of the plot (optional, remove this if you don't see anything in your plot)
Пример #48
0
''' FacetGrid. ''' # FacetGrid is used to draw plots with multiple Axes where each Axes shows the same relationship conditioned on different levels of some variable
myimg = sns.FacetGrid(dframe,hue = 'Stories', col = 'zone', row = 'homebr') # set the grid
myimg = myimg.map(sns.pointplot, 'pricePerSqft') # set the plot type

myimg = sns.FacetGrid(dframe,hue = 'Stories', row = 'zone', aspect = 4) # set the grid
myimg = myimg.map(sns.kdeplot, 'homeprice', shade = True).add_legend().set_axis_labels("Home Prices") # set the plot type

''' Correlation Visualization ''' # aka correlation plots
sns.pairplot(dlyReturns_df)

dlyReturns_fig = sns.PairGrid(dlyReturns_df, size = 5, aspect = 2)
dlyReturns_fig.map_upper(plt.scatter, color = 'darkblue')
dlyReturns_fig.map_lower(sns.kdeplot, cmap = 'cool_d')
dlyReturns_fig.map_diag(plt.hist, bins = 30)

sns.corrplot(dlyReturns_df, annot = True)

###############################################################
###															###
###															###
###   				Importing stock prices					###
###															###
###															###
###############################################################

import pandas.io.data as pdweb
import datetime
from pandas.io.data import DataReader
from datetime import datetime
from __future__ import division # dont have to worry about division complications with python 2.7
                groupby('Stock').resample('M', how='sum')
#convert back to dataframe                
AEV_vol_comb = AEV_vol_comb.reset_index().reindex(columns=['Date','Volume'])
AEV_vol_comb['Month'] = AEV_vol_comb['Date'].dt.month#create col for Month                
sns.barplot("Month", y="Volume",data=AEV_vol_comb,
            palette="BuGn_d")    

#loop through different stocks to compare each other using seaborn
sns.pairplot(myport_rets.dropna())
#correlation bet. closing prices of all stock tickers
returns_fig = sns.PairGrid(myport_Close.dropna())
returns_fig.map_upper(plt.scatter,color='purple')
returns_fig.map_lower(sns.kdeplot,cmap='cool_d')
returns_fig.map_diag(plt.hist,bins=30)
#correlation plot bet. daily returns of all stock tickers
sns.corrplot(myport_rets.dropna(),annot=True)

#sns.jointplot(myport_rets['URC'],myport_rets['CEB'])#joint plot of both datasets
#sns.jointplot(myport_rets['URC'],myport_rets['CEB'],kind='hex')#plot using hex
#sns.jointplot(myport_rets['URC'],myport_rets['JGS'])#joint plot of both datasets
#sns.jointplot(myport_rets['URC'],myport_rets['JGS'],kind='hex')#plot using hex
#sns.jointplot(myport_rets['JGS'],myport_rets['CEB'])#joint plot of both datasets
#sns.jointplot(myport_rets['JGS'],myport_rets['CEB'],kind='hex')#plot using hex

#correlation plot bet. closing prices of all stock tickers
sns.corrplot(myport_Close.dropna(),annot=True)
#######################################
# RISK ANALYSIS
# (A) There are many ways we can quantify risk, one of the most basic ways
# using the info. we've gathered on daily percentage returns is by
# comparing the expected return with the standard deviation of the
Пример #50
0
    data[col] = data[col].astype('float')

# plot the distribution of the predictors using histograms
for col in columns:
    fig, ax = plt.subplots()
    data[col].hist()
    plt.title(col + 'distribution')
    plt.savefig(col)

# plot the correlation matrix of the dataset to see if some predictors are more correlated to the response
seaborn.corrplot(data,
                 sig_stars=True,
                 annot=False,
                 sig_tail='both',
                 sig_corr=False,
                 cmap=None,
                 cmap_range=None,
                 cbar=True,
                 diag_names=True,
                 method='spearman',
                 ax=None)
plt.savefig('correlation_spearman_matrix.png')

# separate the response from the features
y = data['loan_status']
data.drop('loan_status', inplace=True, axis=1)


def Logistic_Regression(X, y, fold):
    # shuffle and split training and test sets
    gini = 0
Пример #51
0
# -*- coding: UTF-8 -*-
#numpy科学计算工具箱
import numpy as np
#使用make_classification构造1000个样本,每个样本有20个feature
from sklearn.datasets import make_classification
X, y = make_classification(1000,
                           n_features=20,
                           n_informative=2,
                           n_redundant=2,
                           n_classes=2,
                           random_state=0)
#存为dataframe格式
from pandas import DataFrame
df = DataFrame(np.hstack((X, y[:, None])), columns=range(20) + ["class"])

print df[:6]

import matplotlib.pyplot as plt
import seaborn as sns
#使用pairplot去看不同特征维度pair下数据的空间分布状况
_ = sns.pairplot(df[:50], vars=[8, 11, 12, 14, 19], hue="class", size=1.5)
plt.show()

import matplotlib.pyplot as plt
plt.figure(figsize=(12, 10))
_ = sns.corrplot(df, annot=False)
plt.show()
Пример #52
0
def inspect_correlations(ModelTrains, filedir='data/plots', FIGWIDTH=FIGWIDTH, FIGHEIGHT=FIGHEIGHT):
    '''Produce Correlation Matrices with Nonmissing Trainer Objects'''
    plt.close('all')
    nonissing_trainers = []
    for trainer in ModelTrains.trainers:
        save_this_directory = filedir + '/{}'.format(trainer.name)
        save_this_here = save_this_directory + '/correlations'
        try:
            os.mkdir(filedir)
        except:
            pass
        try:
            os.mkdir(save_this_directory)
        except:
            pass
        try:
            os.mkdir(save_this_here)
        except:
            pass
        try:
            plt.close('all')
            # Compute the correlation matrix
            corr = trainer.now.corr()
            # Generate a mask for the upper triangle
            mask = np.zeros_like(corr, dtype=np.bool)
            mask[np.triu_indices_from(mask)] = True
            # Set up the matplotlib figure
            fig, axs = plt.subplots(figsize=(FIGWIDTH, FIGHEIGHT))
            # Generate a custom diverging colormap
            cmap = sns.diverging_palette(220, 10, as_cmap=True)
            # Draw the heatmap with the mask and correct aspect ratio
            # with sns.axes_style("white"):
            #     g = sns.heatmap(corr, mask=mask, cmap=cmap, cbar_ax=1, 
            #                     vmax=.3, square=True, cbar=True,
            #                     cbar_kws={"shrink": .5}, linewidths=.5)
            # g = sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3,
            #             square=True, xticklabels=5, yticklabels=5,
            #             linewidths=.5, cbar_kws={"shrink": .5}, 
            #             legend_out=True, ax=axs)
            g = sns.corrplot(trainer.now, annot=False, diag_names=False)
            doc = save_this_here + '/matrix_{}_{}.png'.format(trainer.name, trainer.id)
            t = '\n{}\n(Anti)Correlation Matrix\n{}'.format(trainer.name, 
                                                            trainer.shape)
            plt.title(t, fontsize=12)
            plt.tight_layout()
            fig.savefig(doc)
            plt.close('all')
        except:
            pass
        if trainer.now.isnull().sum().sum() == 0:
            plt.close('all')
            # x, y, z, a = trainer.get_attributes()
            nonissing_trainers.append(trainer)
            fig, axs = plt.subplots(figsize=(FIGWIDTH, FIGWIDTH))
            # plt.figure(figsize=())

            g = sns.corrplot(trainer.now, annot=False, diag_names=False)
            t = "\n{}\nNon-Missing Only Correlation Matrix,\n{}".format(trainer.name, 
                                                                        trainer.shape)
            doc = '{}/{}.png'.format(save_this_directory,'corrplot_{}_{}'.format(trainer.name, 
                                                                                 trainer.id))
            # g.add_legend()

            plt.title(t)
            plt.tight_layout()
            fig.savefig(doc)
            plt.close('all')
    plt.close('all')
Пример #53
0
import pandas
import seaborn
import numpy
from matplotlib import pyplot

df = pandas.read_csv("df.csv")
counts = df['hur.count'].copy()
del df['hur.count']
df.columns = [x.replace(".data", "") for x in df.columns]
seaborn.set(style="darkgrid")

rs = numpy.random.RandomState(33)

f, ax = pyplot.subplots(figsize=(9, 9))
cmap = seaborn.diverging_palette(220, 10, as_cmap=True)
seaborn.corrplot(df,
                 annot=False,
                 sig_stars=False,
                 diag_names=False,
                 cmap=cmap,
                 ax=ax)
f.tight_layout()
pyplot.savefig("corr.png")

pyplot.close()
pyplot.figure(figsize=(9, 9))
seaborn.distplot(counts)
pyplot.xlim([0, 20])
pyplot.xlabel("Hurricanes")
pyplot.title("Histogram of Annual Hurricane Counts")
pyplot.savefig("histogram.png")
Пример #54
0
def visualize(training_data, X, y, pca):
    """
    Computes statistics describing the data and creates some visualizations
    that attempt to highlight the underlying structure.

    Note: Use '%matplotlib inline' and '%matplotlib qt' at the IPython console
    to switch between display modes.
    """

    print('Generating individual feature histograms...')
    num_features = len(training_data.columns)
    num_plots = num_features / 16 if num_features % 16 == 0 else num_features / 16 + 1
    for i in range(num_plots):
        fig, ax = plt.subplots(4, 4, figsize=(20, 10))
        for j in range(16):
            index = (i * 16) + j
            if index == 0:
                ax[j / 4, j % 4].hist(y, bins=30)
                ax[j / 4, j % 4].set_title(training_data.columns[index])
                ax[j / 4, j % 4].set_xlim((min(y), max(y)))
            elif index < num_features:
                ax[j / 4, j % 4].hist(X[:, index - 1], bins=30)
                ax[j / 4, j % 4].set_title(training_data.columns[index])
                ax[j / 4, j % 4].set_xlim(
                    (min(X[:, index - 1]), max(X[:, index - 1])))
        fig.tight_layout()

    print('Generating correlation matrix...')
    fig2, ax2 = plt.subplots(figsize=(16, 10))
    colormap = sb.blend_palette(
        ["#00008B", "#6A5ACD", "#F0F8FF", "#FFE6F8", "#C71585", "#8B0000"],
        as_cmap=True)
    sb.corrplot(training_data,
                annot=False,
                sig_stars=False,
                diag_names=False,
                cmap=colormap,
                ax=ax2)
    fig2.tight_layout()

    if pca is not None:
        print('Generating principal component plots...')
        X = pca.transform(X)
        class_count = np.count_nonzero(np.unique(y))
        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']

        fig3, ax3 = plt.subplots(figsize=(16, 10))
        for i in range(class_count):
            class_idx = i + 1  # add 1 if class labels start at 1 instead of 0
            ax3.scatter(X[y == class_idx, 0],
                        X[y == class_idx, 1],
                        c=colors[i],
                        label=class_idx)
        ax3.set_title('First & Second Principal Components')
        ax3.legend()
        fig3.tight_layout()

        fig4, ax4 = plt.subplots(figsize=(16, 10))
        for i in range(class_count):
            class_idx = i + 1  # add 1 if class labels start at 1 instead of 0
            ax4.scatter(X[y == class_idx, 1],
                        X[y == class_idx, 2],
                        c=colors[i],
                        label=class_idx)
        ax4.set_title('Second & Third Principal Components')
        ax4.legend()
        fig4.tight_layout()

        fig5, ax5 = plt.subplots(figsize=(16, 10))
        for i in range(class_count):
            class_idx = i + 1  # add 1 if class labels start at 1 instead of 0
            ax5.scatter(X[y == class_idx, 2],
                        X[y == class_idx, 3],
                        c=colors[i],
                        label=class_idx)
        ax5.set_title('Third & Fourth Principal Components')
        ax5.legend()
        fig5.tight_layout()
Пример #55
0
# -*- coding: utf-8 -*-
"""
Created on Fri Jan  9 12:43:14 2015

@author: davekensinger
"""

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="darkgrid")

rs = np.random.RandomState(33)
d = rs.normal(size=(100, 30))

f, ax = plt.subplots(figsize=(9, 9))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.corrplot(d,
             annot=False,
             sig_stars=False,
             diag_names=False,
             cmap=cmap,
             ax=ax)
f.tight_layout()
Пример #56
0
df = pd.DataFrame(np.transpose([x, y]), columns=["X", "Y"])
sns.regplot("X", 'Y', df)

sns.regplot("X", 'Y', df, ci=None, color='slategray')

r2 = lambda x, y: stats.pearson(x, y)[0] ** 2
sns.regplot('X', 'Y', df, corr_func=r2, func_name='$R^2$', color='seagreen')


tips = pd.read_csv("https://raw.github.com/mwaskom/seaborn/master/examples/tips.csv")
tips["big_tip"] = tips.tip > (.2 * tips.total_bill)
tips["smoker"] = tips["smoker"] == "Yes"
tips["female"] = tips["sex"] == "Female"
mpl.rc("figure", figsize=(7, 7))
sns.corrplot(tips)
sns.corrplot(tips, sig_stars=False)
sns.corrplot(tips, sig_tail='upper', cmap='PuRd', cmap_range=(-.2, .8))

mpl.rc('figure', figsize=(5, 5))
sns.lmplot('total_bill', 'tip', tips)
sns.lmplot('total_bill', 'tip', tips, color='time')
sns.lmplot('total_bill', 'tip', tips, color='day', palette='muted', ci=None)

tips['tip_sqr'] = tips.tip ** 2
sns.lmplot('total_bill', 'tip_sqr', tips, order=2)

sns.lmplot('size', 'big_tip', tips)
sns.lmplot('size', 'big_tip', tips, x_jitter=0.3, y_jitter=0.075)
sns.lmplot('size', 'big_tip', tips, x_jitter=0.3, y_jitter=0.075, logistic=True, n_boot=1000)
sns.lmplot('total_bill', 'tip', tips, col='sex')
Пример #57
0
def make_correlation_plot(df):
    f, ax = plt.subplots(figsize=(12, 12))
    sns.corrplot(df, annot=True, sig_stars=False,
             diag_names=False, ax=ax)