Exemplos de jointplot em Python, exemplos de seaborn.jointplot em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: plot.py Projeto: DBlackKat/Sentiment-Data-Analysis

def plotTradeVsNews(tickName):
    path2 = "resultsMKII"
    frame = getNewsNTradingVol(tick_Name,path2)
    newsBuz = []
    tradingVol = []
    newsVol = []
    for i in range(len(frame['tradingVol'])):
        newsBuz.append(frame['NewsBuz'].values[i])
        tradingVol.append(np.log(frame['tradingVol'].values[i]))
        newsVol.append(np.log(frame['NewsVol'].values[i]))
    sns.set(style="ticks")
    x = np.array(newsBuz)
    y = np.array(tradingVol)
    ax = sns.jointplot(x,y,kind="hex",stat_func=kendalltau,color="#4CB391")
    ax.set_axis_labels(xlabel= "News Buz",ylabel="Trading Volume")
    g = sns.jointplot(x, y, kind="kde", size=7, space=0)
    g.set_axis_labels(xlabel= "News Buz",ylabel="Trading Volume")

    x = np.array(newsVol)
    ay = sns.jointplot(x,y,kind="hex",stat_func=kendalltau,color="#4CB391")
    ay.set_axis_labels(xlabel= "News Volume",ylabel="Trading Volume")

    h = sns.jointplot(x, y, kind="kde", size=7, space=0)
    h.set_axis_labels(xlabel= "News Volume",ylabel="Trading Volume")
    sns.plt.show()
    # sns.plt.subplot(2,1,1)#41B3D3
    # a1 = sns.regplot(x="NewsBuz", y="tradingVol", data=frame,ci=None,fit_reg=False,color="#1dad9b")
    # a1.set_ylim([0,4e8])
    # sns.plt.subplot(2,1,2)
    #
    # a2 = sns.regplot(x="NewsVol", y="tradingVol", data=frame,ci=None,fit_reg=False,color="#41B3D3")
    # a2.set_ylim([0,4e8])
    sns.plt.show()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: doplot_stan.py Projeto: aasensio/axial_ratio

    def doplot(self, name):
        """
        Do some plots
        """

        self.trace = pickle.load( open( name, "rb" ) )

        var = np.vstack([self.trace['muCB'][:,0], self.trace['muCB'][:,1], self.trace['sdCB'][:,0], self.trace['sdCB'][:,1]]).T

        corner.corner(var, labels=['$\mu_C$', '$\mu_B$', '$\sigma_C$','$\sigma_B$'], show_titles=True)
        
        pl.show()

        # pl.savefig('{0}.png'.format(name))

        # Just get the first N samples. We shuffle the
        # arrays and get the subsamples
        C = self.trace['CB'][:,:,0]
        np.random.shuffle(C)
        C_slice = C[0:200,:].flatten()
        B = self.trace['CB'][:,:,1]
        np.random.shuffle(B)
        B_slice = B[0:200,:].flatten()

        # First option
        pl.plot(B_slice, C_slice, '.', alpha=0.002)
        pl.show()

        # KDE joint plot
        sns.jointplot(C_slice, B_slice, kind='kde')
        pl.show()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: seaborn1.py Projeto: EnriqueU/M-L

def seaborn_join():
    data = np.random.multivariate_normal([0, 0], [[5, 2], [2, 2]], size=2000)
    data = pd.DataFrame(data, columns=['x', 'y']) 
    with sns.axes_style('white'):
        sns.jointplot("x", "y", data, kind='hex')
    
    plt.show()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: beam_source.py Projeto: b-r-oleary/acme

 def histogram(self,x=None, y=None, l=None, t=None, **kwargs):
     """
     this is a short-cut for creating many possible histograms, at a
     specified beamline location l, or specified time t.
     - if x and y are not input, then it creates a full joint-scatterplot
       for each pair of variables (7 variables total: x,y,z, vx, vy, vz, t)
     - if x is input, it creates a 1d histogram with respect to that parameter
     - if x and y are input, creates a 2d histogram with respect to those parameters
     """
     table = self.to_dataframe(l=l, t=t, latex=True)
     if x is None and y is None:
         g = sns.pairplot(table, **kwargs)
         for ax in g.axes.flat:
             _ = plt.setp( ax.xaxis.get_majorticklabels(), rotation=90)
         return
     if x is not None and y is None:
         x = self._reformat_label(x)
         sns.distplot(table[x], **kwargs)
         plt.xlabel(x)
         return
     if x is not None and y is not None:
         x = self._reformat_label(x)
         y = self._reformat_label(y)
         sns.jointplot(x=x, y=y, data=table, **kwargs);
         return

Exemplo n.º 5

0

Exibir arquivo

Arquivo: plotUtils.py Projeto: tarlen5/pisa

def make_scatter_plot(frame, name, **kwargs):
    """
    Makes a scatter plot of column name in frame.
    """

    column_x = frame[name]
    if name == 'deltam31': column_x*=100.0

    params = []
    exclude = set(['hypo','llh','mctrue'])
    params = list(set(frame.columns).difference(exclude))

    figs = []
    # Plot correlation scatter plot for all other systematics
    for p in params:
        if p == name: continue
        column_y = frame[p]
        if p == 'deltam31': column_y*=100.0
        if 'theta' in p: column_y = np.rad2deg(column_y)

        with sns.axes_style("whitegrid"):
            sns.jointplot(column_x, column_y, size=8, color='b',
                          **kwargs)
            plt.tight_layout()
            figs.append(plt.gcf())

    return figs

Exemplo n.º 6

0

Exibir arquivo

Arquivo: plot_poi.py Projeto: mamsdiallo/ud120-projects

def plotBonusvsSalary(df):
    sns.jointplot(x="bonus", y="salary", data=df)
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    fig.savefig('bonusVSsalary.png', dpi=100)    
    #plt.savefig('bonusVSsalary.png')
    plt.show()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: MovementStatisticsGenerator.py Projeto: BioroboticsLab/bb_analysis

	def plot_seaborn( self ):

		# https://stanford.edu/~mwaskom/software/seaborn/tutorial/distributions.html

		data = pd.read_csv( 'movement.csv' ).as_matrix()

		# 1/2 3/4 5/6 7/8
		x_column = 3
		y_column = 4

		limit = 100
		data = data[
			  ( data[:,0] == 0)
			& ( data[:,x_column] > -limit )
			& ( data[:,x_column] < limit )
			& ( data[:,y_column] > -limit )
			& ( data[:,y_column] < limit )
		]

		x = data[:,x_column]
		y = data[:,y_column]

		with sns.axes_style( 'white' ):
			sns.jointplot( x=x, y=y, kind='kde' )  # scatter, reg, resid, hex, kde

		sns.plt.show()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: matmat_original.py Projeto: thran/experiments2.0

def skill_vs_speed(prediction_mode, time_model, data):
    model = TimeCombiner(prediction_mode, time_model)
    Evaluator(data, model).get_report(force_run=True)
    students = data.get_students()
    skills = prediction_mode.get_skills(students)
    fastness = time_model.get_skills(students)
    sns.jointplot(pd.Series(skills), pd.Series(fastness), kind='kde', space=0).set_axis_labels("skill", "speed")

Exemplo n.º 9

0

Exibir arquivo

Arquivo: read_musicXML.py Projeto: rb-roomba/music

def show_graph(data):
    """ Show time series graph of given data. """
    height_list = sorted([[p[0], height(p[1:])] for p in data],
                         key=lambda x: x[0])
    df = pd.DataFrame(height_list)
    df.columns = ["time","height"]
    seaborn.jointplot('time', 'height', data=df)
    plt.show()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: joint_sampler.py Projeto: low-sky/colira

def sbratio(sampler):
    chain = sampler.flatchain
    chain[:,2]=np.abs(chain[:,2])
    chain[:,4]=np.abs(chain[:,4])
    dd = pd.DataFrame(data=chain,
                      columns=['theta','phi','scatter','badfrac','badsig','badmn'])
    with sns.axes_style("white"):
        sns.jointplot("theta", "phi", data, kind="kde");

Exemplo n.º 11

0

Exibir arquivo

Arquivo: instance.py Projeto: ansteh/multivariate

 def plot(self, samples, columns=None):
     if(columns is None):
         df = pd.DataFrame(samples, columns=["x", "y"])
         sns.jointplot(x="x", y="y", data=df)
     else:
         df = pd.DataFrame(samples, columns=[columns[0], columns[1]])
         # sns.jointplot(x=names[0], y=names[1], data=df, xlim=xlim, ylim=ylim)
         sns.jointplot(x=columns[0], y=columns[1], data=df)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: my_script.py Projeto: mosayebi/SAGE

def plot_scatter_hist_sns(x, y):
    #sns.set(color_codes=True)
    #sns.set(style="darkgrid")
    sns.set(style="ticks")
    sns.jointplot(np.array(x), np.array(y), kind="hex", size=4, stat_func=None).set_axis_labels("$\phi$", "$\\theta$")
    with PdfPages('plot4.pdf') as pdf:
         pdf.savefig()
    sns.plt.close()

Exemplo n.º 13

0

Exibir arquivo

Arquivo: treesearch.py Projeto: MaxwellRebo/disco-dop

def plot(data, total, title, width=800.0, unit='', dosort=True,
		target=None, target2=None):
	"""A HTML bar plot given a dictionary and max value."""
	if len(data) > 30 and target is not None:
		df = pandas.DataFrame(index=data)
		df[title] = pandas.Series(data, index=df.index)
		df[target.name] = target.ix[df.index]
		if target2 is not None:
			df[target2.name] = target2.ix[df.index]
		if target.dtype == numpy.number:
			if target2 is None:
				seaborn.jointplot(target.name, title, data=df, kind='reg')
			else:
				seaborn.lmplot(target.name, title, data=df, hue=target2.name)
		else:  # X-axis is categorical
			df.sort_values(by=target.name, inplace=True)
			if target2 is None:
				seaborn.barplot(target.name, title, data=df)
			else:
				seaborn.barplot(target.name, title, data=df, hue=target2.name)
			fig = plt.gcf()
			fig.autofmt_xdate()
		# Convert to D3, SVG, javascript etc.
		# import mpld3
		# result = mpld3.fig_to_html(plt.gcf(), template_type='general',
		# 		use_http=True)

		# Convert to PNG
		figfile = io.BytesIO()
		plt.savefig(figfile, format='png')
		result = '<div><img src="data:image/png;base64, %s"/></div>' % (
				base64.b64encode(figfile.getvalue()).decode('utf8'))
		plt.clf()
		return result

	result = ['<div class=barplot>',
			('<text style="font-family: sans-serif; font-size: 16px; ">'
			'%s</text>' % title)]
	if target is not None:
		data = OrderedDict([(key, data[key]) for key in
				target.sort_values().index if key in data])
	keys = {key.split('_')[0] if '_' in key else key[0] for key in data}
	color = {}
	if len(keys) <= 5:
		color.update(zip(keys, range(1, 6)))
	keys = list(data)
	if dosort:
		keys.sort(key=data.get, reverse=True)
	for key in keys:
		result.append('<br><div style="width:%dpx;" class=b%d></div>'
				'<span>%s: %g %s</span>' % (
				int(round(width * data[key] / total)) if data[key] else 0,
				color.get(key.split('_')[0] if '_' in key else key[0], 1)
					if data[key] else 0,
				htmlescape(key), data[key], unit,))
	result.append('</div>\n')
	return '\n'.join(result)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: mplotter.py Projeto: dantrim/supersusy

def make_JointPlot(plot, region, data, backgrounds) :

    sample_to_plot = []
    if data.name == plot.sample : sample_to_plot.append(data)
    if not len(sample_to_plot) :
        for bk in backgrounds :
            if bk.name == plot.sample : sample_to_plot.append(bk)
    if len(sample_to_plot) == 0 or len(sample_to_plot) > 1 :
        msg('ERROR make_JointPlot received %d samples to plot for plot with name %s'%(len(sample_to_plot), plot.name))
        sys.exit()

    # turn this tree into an array :)
    sample_to_plot = sample_to_plot[0]
    selection_ = '(' + region.tcut + ') * eventweight * ' + str(sample_to_plot.scale_factor)
    tree_array = tree2rec(sample_to_plot.tree, branches=[plot.x_var, plot.y_var],
                            selection=selection_)
    tree_array.dtype.names = (plot.x_var, plot.y_var)
    x_arr = tree_array[plot.x_var]
    y_arr = tree_array[plot.y_var]

    sns.set(style="white")

    # stats?
    stat_func_ = None
    if plot.stat_func == "kendalltau" :
        from scipy.stats import kendalltau
        stat_func_ = kendalltau
    elif plot.stat_func == None :
        from scipy.stats import pearsonr
        stat_func_ = pearsonr

    j_plot_grid = None
    if plot.cmap == None or plot.cmap == "default" :
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, color = plot.color, linewidth = plot.line_width, ylim=[plot.y_range_min,plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
        #j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, color = plot.color, linewidth = plot.line_width, joint_kws={"n_levels":plot.n_levels, "shade":True}, ylim=[plot.y_range_min,plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])

    elif plot.cmap == "cubehelix" :
        cmap_ = sns.cubehelix_palette(as_cmap=True, dark=0, light=1, reverse = True)
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, linewidth = plot.line_width, joint_kws={"cmap":cmap_, "n_levels":plot.n_levels, "shade":True}, ylim=[plot.y_range_min, plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
    elif plot.cmap == "blues" :
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, linewidth = 1.0, joint_kws={"cmap":"Blues", "n_levels":plot.n_levels, "shade":True, "shade_lowest":False}, ylim=[plot.y_range_min, plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
    else :
        msg("cmap attribute of joint plot not yet added")
        sys.exit()

    j_plot_grid.fig.suptitle(plot.title)
    j_plot_grid.fig.subplots_adjust(top=0.935)
    j_plot_grid.set_axis_labels(plot.x_label, plot.y_label)


    # save the plot to file
    outname = plot.name + ".eps"
    j_plot_grid.savefig(outname)
    out = indir + "/plots/" + outdir 
    utils.mv_file_to_dir(outname, out, True)
    fullname = out + "/" + outname
    msg("%s saved to : %s"%(outname, os.path.abspath(fullname)))

Exemplo n.º 15

0

Exibir arquivo

Arquivo: plotter.py Projeto: verajohne/SEP_autoencoder

def plot_approx_posterior(cov, means, index):
	mean = means[index]
	print mean.shape
	mean, cov = util.product_gaussians(mean, np.zeros(2), cov, np.identity(2))
	data = np.random.multivariate_normal(mean, cov, 200)
	df = pd.DataFrame(data, columns=["x", "y"])
	xlim = (mean[0] - 3*np.sqrt(cov[0][0]),mean[0] + 3*np.sqrt(cov[0][0]))
	ylim = (mean[1] - 3*np.sqrt(cov[1][1]),mean[1] + 3*np.sqrt(cov[1][1]))
	sns.jointplot(x="x", y="y", data=df, kind="kde", stat_func= None, xlim = xlim, ylim = ylim)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: show_graph.py Projeto: rb-roomba/music

def plot_var(times, pitches, ends, var_n):
    """ Show time series graph of variation [var_n]. """
    # var_n: 0 to 30 (0: Aria)
    n_data = filter(lambda x:(ends[var_n] < x[0] <= ends[var_n+1]),
                    zip(times, pitches))
    # seaborn
    df = pd.DataFrame(n_data)
    df.columns = ["time","height"]
    seaborn.jointplot('time', 'height', data=df)
    plt.show()

Exemplo n.º 17

0

Exibir arquivo

Arquivo: decay.py Projeto: z01nl1o02/tests

 def show(self):
     Y = np.reshape(self._pr,(1,-1)).tolist()[0]
     X = self._lams
     df = pd.DataFrame({'x':X,'y':Y})
     sns.jointplot(x='x',y='y',data=df)
     Y = np.asarray(Y)
     X = np.asarray(X)
     mean = (X*Y).sum()
     sns.plt.title('mean %f'%mean)
     sns.plt.show()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: plotting.py Projeto: schevalier/MJHMC

def hist_2d(distribution, nsamples, **kwargs):
    """
    Plots a 2d hexbinned histogram of distribution
    """
    distr = distribution(ndims=2)
    sampler = MarkovJumpHMC(distr.Xinit, distr.E, distr.dEdX, **kwargs)
    samples = sampler.sample(nsamples)

    with sns.axes_style("white"):
        sns.jointplot(samples[0], samples[1], kind="kde", stat_func=None)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: data_analysis.py Projeto: AndrewCr0w/ml-playground

def pairwise_joint_plots(df, cols):
    logging.debug('Plotting pairwise joint distributions')
    cols = sorted(cols)
    for colA, colB in [(a,b) for a in cols for b in cols if a < b]:
        file = 'joint_{}_{}.png'.format(colA, colB)
        logging.debug('joint plot: %s', file)
        fig = plt.figure()
        sns.jointplot(df[colA], df[colB], kind='hex')
        plt.savefig(file)
        plt.close()

Exemplo n.º 20

0

Exibir arquivo

Arquivo: KernelDensity.py Projeto: ryscet/TopDown

def AnalyzeAllElectrodes():
    """From Jacek """
    path = '/Users/ryszardcetnarski/Desktop/Nencki/Badanie_NFB/Dane/wszystkie_elektrody_jacek.csv'
    db = pd.read_csv(path)

    for band in ['theta', 'alpha','smr', 'beta1', 'beta2']:
        db[band+'_po'] = db[band+ '_przed'] + db[band+'_roznica']
    #    fig = plt.figure()
     #   fig.suptitle(band)
      #  corr = fig.add_subplot(211)
      #  diff = fig.add_subplot(212)

        sns.jointplot(band +'_przed', band+'_po', data=db, kind="reg")#, color="r", size=7)

      #  fig = plt.figure()
       # fig.suptitle(band)

        sns.jointplot(band +'_przed', band+'_roznica', data=db, kind="reg")#, color="r", size=7)
        conditions_str = ['mixed_conditions' for i in range(0,len(db))]
        conditions = [0 for i in range(0,len(db))]
        GeneralModel( db[band+ '_przed'] ,  db[band+ '_po'] , band, conditions, conditions_str)
        #corr.scatter(db[band +'_przed'], db[band+'_po'])
        #diff.scatter(db[band +'_przed'], db[band+'_roznica'])

    return db







#Kde using sklearn, returns object
   # kde = KernelDensity(kernel='tophat', bandwidth = 3).fit(initial[:, np.newaxis])
   # log_dens = kde.score_samples(x[:, np.newaxis])

    #Plot sklearn kernel estimate
   # kernel.plot(x, np.exp(log_dens), 'g')
    #Plot original data histogram


  #followUp = np.random.random_sample(100)
    #followUp= np.random.normal(20,10, 100)

    #followUp = np.random.normal(20,10, 100)#initial + np.random.normal(0,100,100)
    #followUp = np.random.random_sample(100)#initial + np.random.normal(0,100,100)
    #hist.hist(initial)

    #initial = np.random.normal(20,10, 100)
    #initial = np.random.random_sample(100)


    #Add noise to each observation
    #initial = #initial *0.95 + np.random.normal(100,100,100)
    #Make a follow up by adding nosie second time to the same population

Exemplo n.º 21

0

Exibir arquivo

Arquivo: housing_prices.py Projeto: tarlen5/coursera_ml

def plotCorrelation(frame):

    # Plot correlation of each variable to visualize each dimension:
    sns.jointplot("bedrooms","price",frame,size=8)
    plt.tight_layout()
    sns.jointplot("size","price",frame,size=8)
    plt.tight_layout()

    print("PAUSED...close figures to continue...")
    plt.show()
    return

Exemplo n.º 22

0

Exibir arquivo

Arquivo: plotting.py Projeto: schevalier/MJHMC

def gauss_2d(nsamples=1000):
    """
    Another simple test plot
    1d gaussian sampled from each sampler visualized as a joint 2d gaussian
    """
    gaussian = misc.distributions.TestGaussian(ndims=1)
    control = Control(gaussian.Xinit, gaussian.E, gaussian.dEdX)
    experimental = ContinuousTimeHMC(gaussian.Xinit, gaussian.E, gaussian.dEdX)

    with sns.axes_style("white"):
        sns.jointplot(control.sample(nsamples)[0], experimental.sample(nsamples)[0], kind="hex", stat_func=None)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: drawPlot.py Projeto: WQ-huziang/WQ-Testcode

 def drawJointPlot(self, se1, se2):
     """
     画线性相关图，表示序列1和序列2的相关性
         :param self: 类变量本身
         :param se1: 序列1
         :param se2: 序列2
     """   
     sns.jointplot(se1, se2, kind='reg', color=self.linecolors[0])
     # plt.title(self.title)
     plt.legend()
     plt.show()

Exemplo n.º 24

0

Exibir arquivo

Arquivo: analysis.py Projeto: EhsanTadayon/alleninf

def fixed_effects(data, labels):
    
    corcoeff, p_val = pearsonr(data[labels[0]], data[labels[1]])
    print "Pearson correlation between %s and %s across all donors is %g (two tailed p value = %g)"%(labels[0], labels[1], corcoeff, p_val)
    
    grid = sns.jointplot(labels[0], labels[1], data, kind="hex")
    sns.jointplot(labels[0], labels[1], data, kind="reg", 
                         xlim=grid.ax_joint.get_xlim(),
                         ylim=grid.ax_joint.get_ylim())
    plt.show()
    
    return corcoeff, p_val

Exemplo n.º 25

0

Exibir arquivo

Arquivo: assembly.py Projeto: Hensonmw/jcvi

def covlen(args):
    """
    %prog covlen covfile fastafile

    Plot coverage vs length. `covfile` is two-column listing contig id and
    depth of coverage.
    """
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from jcvi.formats.base import DictFile

    p = OptionParser(covlen.__doc__)
    p.add_option("--maxsize", default=1000000, type="int", help="Max contig size")
    p.add_option("--maxcov", default=100, type="int", help="Max contig size")
    p.add_option("--color", default='m', help="Color of the data points")
    p.add_option("--kind", default="scatter",
                 choices=("scatter", "reg", "resid", "kde", "hex"),
                 help="Kind of plot to draw")
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 2:
        sys.exit(not p.print_help())

    covfile, fastafile = args
    cov = DictFile(covfile, cast=float)
    s = Sizes(fastafile)
    data = []
    maxsize, maxcov = opts.maxsize, opts.maxcov
    for ctg, size in s.iter_sizes():
        c = cov.get(ctg, 0)
        if size > maxsize:
            continue
        if c > maxcov:
            continue
        data.append((size, c))

    x, y = zip(*data)
    x = np.array(x)
    y = np.array(y)
    logging.debug("X size {0}, Y size {1}".format(x.size, y.size))

    df = pd.DataFrame()
    xlab, ylab = "Length", "Coverage of depth (X)"
    df[xlab] = x
    df[ylab] = y
    sns.jointplot(xlab, ylab, kind=opts.kind, data=df,
                  xlim=(0, maxsize), ylim=(0, maxcov),
                  stat_func=None, edgecolor="w", color=opts.color)

    figname = covfile + ".pdf"
    savefig(figname, dpi=iopts.dpi, iopts=iopts)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: movie_data_handle_v1.py Projeto: fzhurd/fzwork

def main():
    movie_raw_data = pd.read_csv('../input/movie_metadata.csv')
    print movie_raw_data.head(3)

    print movie_raw_data.isnull().sum()

    print movie_raw_data.shape
    movie_raw_data_dropna=movie_raw_data.dropna()
    print movie_raw_data_dropna.shape
    print movie_raw_data.dtypes


    # movie_filterd_imdbscore=movie_raw_data['imdb_score'].loc
    # movie_filterd_imdbscore=movie_raw_data.loc[movie_raw_data['imdb_score'].isin([2,3])]

    movie_filterd_imdbscore_first=movie_raw_data.loc[movie_raw_data['imdb_score'] >5]
    movie_filterd_imdbscore_from_raw=movie_raw_data.loc[movie_raw_data['imdb_score'] <8]

    print movie_filterd_imdbscore_first.shape


    movie_filterd_imdbscore_second=movie_filterd_imdbscore_first.loc[movie_raw_data['imdb_score'] <8]

    print movie_filterd_imdbscore_second.shape
    print movie_filterd_imdbscore_from_raw.shape

    print '*********************************'

    print movie_raw_data_dropna.head(3)
    profit=(((movie_raw_data_dropna['gross'].values-movie_raw_data_dropna['budget'].values))/(movie_raw_data_dropna['gross'].values))*100
    print profit

    movie_raw_data_dropna.loc[:,'profit']=pd.Series(profit, movie_raw_data_dropna.index)
    print movie_raw_data_dropna.shape
    print movie_raw_data_dropna.head(3)


    corr=movie_raw_data_dropna.corr()
    print corr

    f, ax = plt.subplots(figsize=(11, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(corr, cmap=cmap, vmax=1,
            square=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)

    g = sns.jointplot(x="title_year", y="profit",kind='scatter',size=10,ylim = [0,110],xlim=[1980,2020],data=movie_raw_data_dropna)
    h = sns.jointplot(x="imdb_score", y="profit",kind='reg',size=10,ylim = [0,110],data=movie_raw_data_dropna)

    # j = sns.pairplot(movie_raw_data_dropna,hue='content_rating')

    plt.show()

Exemplo n.º 27

0

Exibir arquivo

Arquivo: c5.py Projeto: 3774257/abu

def sample_54_1():
    """
    5.4 使用seaborn可视化数据
    :return:
    """
    sns.distplot(tsla_df['p_change'], bins=80)
    plt.show()

    sns.boxplot(x='date_week', y='p_change', data=tsla_df)
    plt.show()

    sns.jointplot(tsla_df['high'], tsla_df['low'])
    plt.show()

Exemplo n.º 28

0

Exibir arquivo

Arquivo: analysis_windfield_game1.py Projeto: chili-epfl-cellulo/cellulo_log_analysis-DEPRECATED

def occupationAnalysis():
    img = plt.imread("playground.jpg")
    robot_position = readLog( "./csv/windfield_game1_green_withindex_position.csv")
    data=np.zeros((nbcols, nbrows))
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1

    robot_position = readLog( "./csv/windfield_game1_orange_withindex_position.csv")
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1

    robot_position = readLog( "./csv/windfield_game1_blue_withindex_position.csv")
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1
    fig, ax = plt.subplots()
    #heatmap = ax.pcolor(data)
    red_high = ((0., 0., 0.),
         (.3, .5, 0.5),
         (1., 1., 1.))

    blue_middle = ((0., .2, .2),
         (.3, .5, .5),
         (.8, .2, .2),
         (1., .1, .1))

    green_none = ((0,0,0),(1,0,0))

    cdict3 = {'red':  red_high,
     'green': green_none,
     'blue': blue_middle,
     'alpha': ((0.0, 0.0, 0.0),
               (0.3, 0.5, 0.5),
               (1.0, 1.0, 1.0))
    }

    #ax.scatter(x, y, label=str(i), color=color, alpha=0.5)
    #dropout_high = LinearSegmentedColormap('Dropout', cdict3)
    #plt.register_cmap(cmap = dropout_high)
    sns.jointplot(x="x", y="y", data=data, kind="kde");

Exemplo n.º 29

0

Exibir arquivo

Arquivo: visualise.py Projeto: vianziro/msc-thesis

def performance_vs_coverage(db, output=None, max_values=250, **kwargs):
    data = [
        row for row in
        db.execute(
            "SELECT "
            "    performance AS performance, "
            "    coverage "
            "FROM param_stats"
        )
    ]
    frame = pandas.DataFrame(data, columns=("Performance", "Legality"))
    sns.jointplot("Legality", "Performance", data=frame,
                  xlim=(0, 1), ylim=(0, 1))
    viz.finalise(output, **kwargs)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: determine-vs-probability.py Projeto: z01nl1o02/tests

 def show(self):
     pos = np.argsort(self.pr)[0][-20:]
     for k in pos:
         print self.hypos[k],self.pr[0,k]
     pos = np.argmax(self.pr)
     print 'max',self.hypos[pos],'pr=',self.pr[0,pos]
     X = []
     for idx,hypo in enumerate(self.hypos):
         N,f = hypo
         X.append(idx)
     Y = self.pr.tolist()[0]
     df = pd.DataFrame({'x':X,'y':Y})
     sns.jointplot(x='x',y='y',data=df)
     sns.plt.show()

Exemplo n.º 31

0

Exibir arquivo

# In[25]:

# comapre with men and women that who have more target zero and who have not
fig, ax = plt.subplots(figsize=(10, 5))
sns.countplot(df['target'], hue=df['sex'], ax=ax)
plt.xlabel('target')
plt.ylabel('sex')
plt.xticks(rotation=50)
plt.show

# In[26]:

nums = ['age', 'sex', 'trestbps', 'chol', 'trestbps', 'target']
for i in nums:
    plt.figure(figsize=(20, 10))
    sns.jointplot(x=df[i], y=df['target'], kind='reg')
    plt.xlabel(i)
    plt.ylabel('resposne')
    plt.grid()
    plt.show()

# In[8]:

plt.bar(df['target'], df['age'], alpha=.5, width=0.8, label='chart')
plt.show()

# In[62]:

sns.catplot('sex', 'target', data=df, kind='box', hue='fbs')

# In[53]:

Exemplo n.º 32

0

Exibir arquivo

Arquivo: HousePriceProject_H_S.py Projeto: himalithaker/Predicting-House-Cost

print("Minimum Cost: ${}".format(_min_cost)) 
print("Maximum Cost: ${}".format(_max_cost))
print("Mean Cost: ${}".format(_mean_cost))
print("Median Cost ${}".format(_median_cost))
print("Standard deviation of Cost: ${}".format(_stddev_cost))


_housedata['bedrooms'].value_counts().plot(kind='bar')
plt.title('Total number of Bedroom')
plt.xlabel('Bedrooms')
plt.ylabel('Count of Bedrooms')
plt.show()
#sns.despine

plt.figure(figsize=(10,10))
sns.jointplot(x=_housedata.lat.values, y=_housedata.long.values, size=10)
plt.ylabel('Longitude of House', fontsize=12)
plt.xlabel('Latitude of House', fontsize=12)
plt.show()
#plt1 = plt()
#sns.despine

plt.scatter(_housedata.price,_housedata.sqft_living)
plt.title("Price of House vs Square Feet of House")
plt.show()

plt.scatter(_housedata.price,_housedata.long)
plt.title("Price of House vs Location of the house area")
plt.show()

plt.scatter(_housedata.price,_housedata.lat)

Exemplo n.º 33

0

Exibir arquivo

    names = ['variance','skewness','curtosis','entropy','class'])

data.head(3)
data.describe()
data.shape
data.isna().any()
data.dtypes
data['class'].unique()
sns.countplot(x='class', data= data)
sns.violinplot( y=data['curtosis'])
sns.violinplot( y=data['entropy'])
sns.violinplot( y=data['variance'])
sns.violinplot( y=data['skewness'])
p1=sns.kdeplot(data['curtosis'], shade=True, color="r")
p1=sns.kdeplot(data['variance'], shade=True, color="b")
sns.jointplot(x=data['curtosis'], y=data['entropy'], kind='hex', linewidth = 2)
sns.jointplot(x=data['skewness'], y=data['variance'], kind='hex', color = 'skyblue', linewidth = 2)
sns.jointplot(x=data['curtosis'], y=data['variance'], kind='hex', linewidth = 2)
X = data[['variance', 'skewness' ,'curtosis', 'entropy']]
y = data[['class']]

from sklearn.model_selection import train_test_split # Support Vector Machine
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

from sklearn.svm import SVC
SVC()
svc = SVC()

# Creating a dictionary of parameters

parameters = {

Exemplo n.º 34

0

Exibir arquivo

Arquivo: Langevin-MC.py Projeto: XanderJC/MCMC-Project

    def kde(self, n=0):

        sns.jointplot(x=self.samples[:, n, 0],
                      y=self.samples[:, n, 1],
                      kind="kde")

Exemplo n.º 35

0

Exibir arquivo

Arquivo: experiment_graphsize_distribution.py Projeto: scott198510/toulouse-road-network-dataset

def generate_plots(plot_type=""):
    r"""
    Generate plots studying the distribution of graphs in different splits with respect to the graph size (|V| and |E|)
    
    :param plot_type: type of plot in {"histograms", "marginal_E", "marginal_V", "joint"}
    """
    assert plot_type in {"histograms", "marginal_E", "marginal_V", "joint"}
    split_names = ["test", "valid", "train"]

    tot_n_nodes = []
    tot_n_edges = []
    for split_name in split_names:
        d = ToulouseRoadNetworkDataset(split=split_name,
                                       step=0.001,
                                       max_prev_node=8)
        dataloader = DataLoader(d,
                                batch_size=1,
                                shuffle=False,
                                collate_fn=custom_collate_fn)

        n_nodes = []
        n_edges = []
        for datapoint in dataloader:
            this_x_adj, this_x_coord, this_y_adj, this_y_coord, this_img, this_seq_len, this_id = datapoint
            n_edges.append(int(this_y_adj.view(-1).sum().item()))
            n_nodes.append(int(this_seq_len[0] - 2))

        tot_n_edges += n_edges
        tot_n_nodes += n_nodes
        n_nodes = np.array(n_nodes)
        n_edges = np.array(n_edges)

        print(f"{split_name} min/mean/max len nodes", np.min(n_nodes),
              np.mean(n_nodes), np.max(n_nodes))
        print(f"{split_name} min/mean/max len edges", np.min(n_edges),
              np.mean(n_edges), np.max(n_edges))

        if plot_type == "histograms":
            plt.hist(n_nodes, bins=np.max(n_nodes) - np.min(n_nodes) +
                     1)  # arguments are passed to np.histogram
            plt.title(f"Histogram of |V| for {split_name}")
            plt.savefig(f"plots/histogram_|V|_{split_name}.png")
            plt.clf()
            plt.hist(n_edges, bins=np.max(n_edges) - np.min(n_edges) +
                     1)  # arguments are passed to np.histogram
            plt.title(f"Histogram of |E| for {split_name}")
            plt.savefig(f"plots/histogram_|E|_{split_name}.png")
            plt.clf()
        elif plot_type == "marginal_V":
            a = sns.kdeplot(n_nodes, bw=.5, shade=True, label=split_name)
        elif plot_type == "marginal_E":
            b = sns.kdeplot(n_edges, bw=.5, shade=True, label=split_name)
        else:
            sns_plot = sns.jointplot(np.log10(n_nodes),
                                     np.log10(n_edges),
                                     marginal_kws=dict(kernel="gau", bw=.02),
                                     kind="kde",
                                     bw=.05)
            sns_plot.ax_joint.set_xlabel("log10 |V|", fontsize=15)
            sns_plot.ax_joint.set_ylabel("log10 |E|", fontsize=15)
            sns_plot.ax_marg_x.set_title(split_name, fontsize=20)
            sns_plot.ax_joint.set_xlim(0.6, 1.2)
            sns_plot.ax_joint.set_ylim(0.4, 1.2)
            sns_plot.savefig(f"plots/joint_{split_name}.png")

    tot_n_nodes = np.array(tot_n_nodes)
    tot_n_edges = np.array(tot_n_edges)
    print(f"min/mean/max len nodes", np.min(tot_n_nodes), np.mean(tot_n_nodes),
          np.max(tot_n_nodes))
    print(f"min/mean/max len edges\n", np.min(tot_n_edges),
          np.mean(tot_n_edges), np.max(tot_n_edges))

    if plot_type == "marginal_V":
        a.set_xlabel("|V|")
        a.set_ylabel("p(x)")
        a.set_title("Distributions of |V|")
        a.legend()
        a.figure.savefig(f"plots/marginal_|V|.png")
        a.figure.clf()

    if plot_type == "marginal_E":
        b.set_xlabel("|E|")
        b.set_ylabel("p(x)")
        b.set_title("Distributions of |E|")
        b.legend()
        b.figure.savefig(f"plots/marginal_|E|.png")
        b.figure.clf()

    print("Done!")

Exemplo n.º 36

0

Exibir arquivo

Arquivo: magic_formula.py Projeto: steve84/finanzlabor.blog

createFigure(
    figure_data_without_zynex, 'EY_ROC', EARNINGS_YIELD, 'Return On Capital (%)',
    'Earnings Yield (%)', 'ey_roc.png', 'lower right',
    vscaling=1.2, hscaling=2)
createFigure(
    figure_data, 'total_rank', 'EY_rank', 'Rank Return On Capital',
    'Rank Earnings Yield', 'ey_roc_rank.png', 'upper right',
    number_format='%d', vscaling=1.2, hscaling=2)

# Drop outliers
df_capped = df[df[EARNINGS_YIELD].between(
    df[EARNINGS_YIELD].quantile(0.05), df[EARNINGS_YIELD].quantile(0.95))]
df_capped = df_capped[df_capped['ROC'].between(
    df_capped['ROC'].quantile(0.05), df_capped['ROC'].quantile(0.95))]

# Save density plot
ax = sb.jointplot(EARNINGS_YIELD, 'ROC', data=df_capped, kind='kde', color="g")
ax.set_axis_labels('Earnings Yield (%)', 'Return On Capital (%)')
plt.tight_layout()
plt.savefig('density_plot.png', format='png')

plt.clf()

# Create industry histogram
ax = sb.countplot(x=SECTOR, data=figure_data, palette='Blues_d')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.set_ylabel('Amount')
ax.set_xlabel('Industry')
plt.tight_layout()
plt.savefig('industry_histogram.png', format='png')

Exemplo n.º 37

0

Exibir arquivo

Arquivo: tp5.py Projeto: anouel/cours-2017-2018

#%%
#Histogramme
seaborn.distplot(ordis.price)

#%%
# Boîte à moustaches
seaborn.factorplot("price", data=ordis, kind="box")

#%%
# violin
seaborn.factorplot("price", data=ordis, kind="violin")

#%%
# Lien entre price et var quanti (speed, hd)
seaborn.factorplot("speed", "price", data=ordis)
seaborn.jointplot("hd", "price", data=ordis, kind="reg")

#%%
# Lien entre price et var quali (ram, cd, premium, screen)
seaborn.factorplot("ram", "price", data=ordis, kind="box")
seaborn.factorplot("cd", "price", data=ordis, kind="box")
seaborn.factorplot("premium", "price", data=ordis, kind="box")
seaborn.factorplot("screen", "price", data=ordis, kind="box")

#%%
# price ~ speed et hd
t = pandas.crosstab(pandas.cut(ordis.hd, 6, precision=0),
                    ordis.speed,
                    values=ordis.price,
                    aggfunc=numpy.mean)
seaborn.heatmap(t, cmap="Blues", cbar_kws={'label': 'mean price'})

Exemplo n.º 38

0

Exibir arquivo

# In[18]:

sns.pairplot(sub_task_summary_Output, hue='EV', palette='Set1')

# In[20]:

# SIMPLE LINE PLOT
sub_task_summary_Output['EV'].plot(figsize=(20, 12))

# In[26]:

# In[65]:

plt.figure(figsize=(12, 8))

sns.jointplot(x='SPI', y='EV', data=sub_task_summary_Output, color='hotpink')
sns.jointplot(x='CPI', y='EV', data=sub_task_summary_Output, color='red')
sns.jointplot(x='EAC', y='EV', data=sub_task_summary_Output, color='blue')

#

# In[41]:

# In[55]:

# In[56]:

# In[66]:

# In[67]:

Exemplo n.º 39

0

Exibir arquivo

Arquivo: plotsV1.py Projeto: bjonnh/AMBER

def heatscatter_sns(x, y, figsize=(8, 8)):
    sns.set(rc={'figure.figsize': figsize})
    sns.set(style="white", color_codes=True)
    sns.jointplot(x=x, y=y, kind='kde', color="skyblue")

Exemplo n.º 40

0

Exibir arquivo

plt.figure(figsize=(10, 25))
sns.countplot(y='country', data=dataset, alpha=alpha)
plt.title('Data by country')
plt.show()

# Between Genders Male vs Female
plt.figure(figsize=(7, 7))
sex = sns.countplot(x='sex', data=dataset)

# Corelation between the Data
plt.figure(figsize=(16, 7))
cor = sns.heatmap(dataset.corr(), annot=True)

g = sns.jointplot(dataset.year,
                  dataset.suicides_no,
                  kind="kde",
                  color="#bfa9e0",
                  size=7)
plt.savefig('graph.png')

# Visualizing which age of people Suicide the most
plt.figure(figsize=(16, 7))
bar_age = sns.barplot(x='sex', y='suicides_no', hue='age', data=dataset)

# Visualizing which Generation of people Suicide the most
plt.figure(figsize=(16, 7))
bar_gen = sns.barplot(x='sex', y='suicides_no', hue='generation', data=dataset)

cat_accord_year = sns.catplot('sex',
                              'suicides_no',
                              hue='age',

Exemplo n.º 41

0

Exibir arquivo

df = DataFrame(iris.data,columns = iris.feature_names)
df['target'] = iris.target
print(df)

#数据可视化
import pandas as pd
from scipy import stats,integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes = True)
#数据分布可视化，直方图和密度函数
#distplot()函数默认绘出数据的直方图和密度函数
sns.distplot(df['petal length (cm)'],bins = 15)

#jointplot()函数同时绘制散点图和直方图
sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,size =8)


#分组散点图
#用seaborn.FacetGrid标记不同的种类
sns.FacetGrid(df,hue = 'target',size =8).map(plt.scatter,'sepal length (cm)','sepal width (cm)').add_legend()


#六边形图
sns.axes_style('white')
sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,kind = 'hex',color = 'r')

#二维核密度估计图
g = sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,kind = 'kde',color = 'm')
#添加散点图
g.plot_joint(plt.scatter,c='w',s=30,linewidth=1,marker='+')

Exemplo n.º 42

0

Exibir arquivo

Arquivo: pythonbasics_visualisation.py Projeto: mmudgal33/python-projects-codes

sns.distplot(bd['age'], kde=False, norm_hist=True, bins=10)
sns.distplot(bd['age'], hist=False)
sns.distplot(bd['age'], hist=False)

myimg = myplot.get_figure()
myimg.savefig('distplot.png')

sns.kdeplot(bd['age'])  # other distribution plot, less used
sns.kdeplot(bd['age'], shade=True)  # shade area
sns.kdeplot(bd['pdays'], shade=True)

myplot = sns.boxplot(y='age', data=bd)
myimg = myplot.get_figure()
myimg.savefig('boxplot.png')

myplot = sns.jointplot(x='age', y='balance', data=bd.iloc[:500, :])
myimg = myplot.get_figure()  # not work in jointplot
myimg.savefig('jointplot.png')
myplot = sns.jointplot(x='age',
                       y='balance',
                       data=bd.iloc[:100, :],
                       kind='hex',
                       size=10)
# light colour less density,givenby hex
help(sns.jointplot)
sns.jointplot(x='age',
              y='duration',
              data=bd.iloc[:100, :],
              kind='kde',
              size=10)
myplot = sns.lmplot(x='age', y='balance', data=bd.iloc[1:10, :])

Exemplo n.º 43

0

Exibir arquivo

Arquivo: Mar 26 - Inclass assignment 2.py Projeto: rufus95/Data-viz-Python

sns.distplot(data['x'])
sns.distplot(data['y'])

# In[9]:

for col in 'xy':
    sns.kdeplot(data[col], shade=True)

# In[10]:

sns.kdeplot(data)

# In[12]:

with sns.axes_style('white'):
    sns.jointplot("x", "y", data, kind='kde')

# In[13]:

with sns.axes_style('white'):
    sns.jointplot("x", "y", data, kind='hex')

# In[14]:

sns.pairplot(data)

# In[20]:

import plotly.graph_objs as go
import numpy as np
x = np.random.randn(2000)

Exemplo n.º 44

0

Exibir arquivo

Arquivo: testseaborn.py Projeto: Alafazam/seabornplots

    5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 1, 11, 10, 10, 10, 10,
    10, 10, 10, 8, 3, 7, 3, 2, 2, 2, 11, 7, 7, 11, 11, 9, 9, 8, 8, 8, 8, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 6, 12, 12, 12, 11, 11, 11, 9, 9, 9, 9, 9, 11, 11, 10,
    1, 12, 12, 12, 3, 2, 12, 11, 11, 11, 11, 11, 11, 11, 10, 3, 11, 11, 2, 2,
    1, 1, 1, 12, 12, 12, 12, 12, 12, 12, 6, 6
]
y = [
    30, 29, 29, 24, 19, 11, 9, 8, 7, 3, 57, 54, 52, 34, 30, 29, 8, 1, 49, 44,
    33, 31, 29, 29, 28, 27, 2, 6, 5, 52, 41, 36, 18, 27, 26, 46, 32, 35, 33,
    15, 14, 10, 0, 51, 49, 44, 43, 28, 27, 26, 19, 16, 56, 21, 19, 16, 49, 43,
    39, 25, 23, 22, 21, 13, 23, 1, 13, 17, 59, 55, 54, 10, 59, 1, 59, 57, 27,
    25, 22, 21, 4, 49, 59, 31, 30, 5, 0, 8, 6, 0, 39, 37, 35, 31, 27, 25, 18,
    11, 9
]

# print rs
# x = rs.gamma(12, size=60)
# y = 2 + rs.gamma(60,size=60)
# x = rs.gamma(2, size=1000)

# print 'y = '+ str(y)

graph = sns.jointplot(x, y, kind="hex", stat_func=kendalltau, color="#4CB391")

# x = np.random.normal(size=100)
# print 'x = '+ str(x)
# graph = sns.distplot(x);

sns.plt.savefig(__main__.__file__ + ".png")
# graph.pyplot.show()
sns.plt.show()

Exemplo n.º 45

0

Exibir arquivo

print("Kurtosis:")
print(data_set['T_MAX'].kurtosis())

## Graph T MAX / CO & O3
df = data_set.sort_values(['T_MAX', 'CO'], ascending=True)
plt.plot(df['T_MAX'], df['CO'])
plt.title("La concentración de CO frente a la temperatura máxima")
plt.show()

df = data_set.sort_values(['T_MAX', 'O3'], ascending=True)
plt.plot(df['T_MAX'], df['O3'])
plt.title("La concentración de Ozono frente a la temperatura máxima")
plt.show()

## Pairplot
sns.jointplot(data_set['T_MAX'], data_set['CO'], kind="reg")
plt.show()
plt.close()
sns.jointplot(data_set['T_MAX'], data_set['O3'], kind="reg")
plt.show()
plt.close()

## Correlation Matrix
data_set_corr = data_set
data_set_corr['Mes'] = data_set_corr['Mes'].map({
    'ENE': 1,
    'FEB': 2,
    'MAR': 3,
    'ABR': 4,
    'MAY': 5,
    'JUN': 6,

Exemplo n.º 46

0

Exibir arquivo

ax_histx = plt.axes(rect_histx)
ax_histx.tick_params(direction='in', labelbottom=False)
ax_histy = plt.axes(rect_histy)
ax_histy.tick_params(direction='in', labelleft=False)

# the scatter plot:
ax_scatter.scatter(x, y)

# now determine nice limits by hand:
binwidth = 0.25
lim = np.ceil(np.abs([x, y]).max() / binwidth) * binwidth
ax_scatter.set_xlim((-lim, lim))
ax_scatter.set_ylim((-lim, lim))

bins = np.arange(-lim, lim + binwidth, binwidth)
ax_histx.hist(x, bins=bins)
ax_histy.hist(y, bins=bins, orientation='horizontal')

ax_histx.set_xlim(ax_scatter.get_xlim())
ax_histy.set_ylim(ax_scatter.get_ylim())

plt.show()

# Seaborn version
import numpy as np
import seaborn as sns
#sns.set(style="ticks")

sns.jointplot(x, y)
sns.jointplot(x, y, kind="hex", color="#4CB391")

Exemplo n.º 47

0

Exibir arquivo

Arquivo: Getting_Started_With_Data_Analysis.py Projeto: tonomuniz/Python-in-Quantitative-Finance

#Visulization
matplotlib.rcdefaults()

plt.show(df.plot(kind = 'box'))

pd.options.display.mpl_style = 'default' # Sets the plotting display theme to ggplot2
df.plot(kind = 'box')

sns.boxplot(data=df,width=0.5)
sns.violinplot(df,width=3.5)

plt.show(sns.distplot(df.ix[:,2], rug = True, bins = 15))

with sns.axes_style("white"):
    plt.show(sns.jointplot(df.ix[:,1],df.ix[:,2], kind = "kde"))

plt.show(sns.lmplot("Benguet","Ifugao",df))

#Creating custom function
def add_2int(x,y):
    return x+y
print(add_2int(2,2))

# an algorithm example
def case(n=10,mu=3,sigma=np.sqrt(5),p=0.025,rep=100):
    m=np.zeros((rep,4))

    for i in range(rep):
        norm = np.random.normal(loc = mu, scale = sigma, size = n)
        xbar = np.mean(norm)

Exemplo n.º 48

0

Exibir arquivo

Arquivo: RS.py Projeto: afcarl/PythonDS-MLBootcamp

df.head()

import matplotlib.pyplot as plt
import seaborn as sns

df.groupby('title')['rating'].mean().sort_values(ascending=False).head()
df.groupby('title')['rating'].count().sort_values(ascending=False).head()

ratings = pd.DataFrame(df.groupby('title')['rating'].mean())

ratings['numRatings'] = pd.DataFrame(df.groupby('title')['rating'].count())
ratings.head()

ratings['numRatings'].hist(bins=100, figsize=(10, 6))
ratings['rating'].hist(bins=100, figsize=(10, 6))
sns.jointplot(x='rating', y='numRatings', data=ratings, alpha=0.6)
# as the number of ratings goes up, so does the average rating

moviemat = df.pivot_table(index='user_id', columns='title', values='rating')

moviemat.head()

ratings.sort_values('numRatings', ascending=False).head(10)

starwars_user_ratings = moviemat['Star Wars (1977)']
liarliar_user_ratings = moviemat['Liar Liar (1997)']

# This will show how people who have seen star wars rate other movies
similar_to_starwars = moviemat.corrwith(starwars_user_ratings)
similar_to_liarliar = moviemat.corrwith(liarliar_user_ratings)

Exemplo n.º 49

0

Exibir arquivo

Arquivo: JointDistributions.py Projeto: alketcecaj12/datavizinpython

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

#sns.residplot(x='age',y='fare',data=tips,color='indianred')
# Generate a green residual plot of the regression between 'hp' and 'mpg'

auto = pd.read_csv('auto.csv')


# Generate a joint plot of 'hp' and 'mpg'
sns.jointplot(x = 'hp', y = 'mpg', data = auto)

# Display the plot
plt.show()

Exemplo n.º 50

0

Exibir arquivo

axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes1.scatter(j_day, dw_solar_everyday, label='Observed dw_solar', color='red')
axes1.scatter(j_day, ghi_everyday, label='Clear Sky GHI', color='green')

axes1.set_xlabel('Days')
axes1.set_ylabel('Solar Irradiance (Watts /m^2)')
axes1.set_title('Solar Irradiance - Test Year 2009')
axes1.legend(loc='best')

fig.savefig('RNN Paper Results/Exp2_1/' + test_location + '/' + test_year +
            'Figure 2.jpg',
            bbox_inches='tight')

# In[525]:

sns.jointplot(x=dw_solar_everyday, y=ghi_everyday, kind='reg')
plt.xlabel('Observed global downwelling solar (Watts/m^2)')
plt.ylabel('Clear Sky GHI (Watts/m^2)')
plt.savefig('RNN Paper Results/Exp2_1/' + test_location + '/' + test_year +
            'Figure 3',
            bbox_inches='tight')

# ### making the Kt (clear sky index at time t) column by first removing rows with ghi==0

# In[526]:

if run_train:
    # TRAIN dataset
    df_train = df_train[df_train['ghi'] != 0]
    df_train['Kt'] = df_train['dw_solar'] / df_train['ghi']
    df_train.reset_index(inplace=True)

Exemplo n.º 51

0

Exibir arquivo

Arquivo: PS05.py Projeto: konstantin-boss/Quantitative-Macro-2019

#mu = np.array([-0.5, -2.5])
size = 1000000 # at 10 million my RAM is overloaded

### If a vector X is normally distributed, then exp(X) is lognormally distributed with the same mean and variance

log_data = np.random.multivariate_normal(mu,cov, size=size)
level_data = np.exp(log_data)
k = level_data[:,1]
z = level_data[:,0]
lnk = log_data[:,1]
lnz = log_data[:,0]


### Plotting the joint density functions for levels and for logs
## First levels
sns.jointplot(k,z,kind="hex").set_axis_labels("Capital", "Productivity")
plt.show()

sns.jointplot(lnk,lnz,kind="hex").set_axis_labels("Log Capital", "Log Productivity")
plt.show()
'''
## Plotting the raw joint density of lognormal variables does not make much sense as in 10,000,000 observations there will be massive outliers
### I atempt to get rid of these outliers for plotting purposes

meank = np.mean(k)
sdk = np.std(k)
final_k = [x for x in k if (x > meank - 2 * sdk)]
final_k = [x for x in final_k if (x < meank + 2 * sdk)]


meanz = np.mean(z)

Exemplo n.º 52

0

Exibir arquivo

Arquivo: Regression_pca.py Projeto: Prashantpiyush11/DSSreni

def viz_cont_cont(df, features, target):
    for feature in features:
        sns.jointplot(x=feature, y=target, data=df)

Exemplo n.º 53

0

Exibir arquivo

Arquivo: Movie_Project_Detailed_Data_Processing.py Projeto: mzhou356/dc_ds_06_03_mod1_project

merged_df.popularity.plot.hist(bins=50, color='green')
# explore vote_average distribution
# appear to be almost normal distribution
merged_df.vote_average.plot.hist(bins=50, color='red')
# to fix popularity, we will remove vote_count under 10 to prevent bias
merged_df = merged_df[~(merged_df.vote_count < 10)]
# replot
merged_df.popularity.plot.hist(bins=50, color='blue',
                               alpha=0.5)  # appear to be better
# plot scatter and find r2 for popularity versus domestic_gross columns
# before plot, we want to convert the scale into log10 and need to remove 0s
merged_df = merged_df[~(merged_df.domestic_gross == 0)]
merged_df = merged_df[~(merged_df.worldwide_gross == 0)]
merged_df.to_pickle('budget_popularity.pkl')
sns.jointplot(merged_df['popularity'],
              np.log10(merged_df['domestic_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['popularity'],
              np.log10(merged_df['worldwide_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['vote_average'],
              np.log10(merged_df['domestic_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['vote_average'],
              np.log10(merged_df['worldwide_gross']),
              kind="reg",
              stat_func=hf.r2)
# popularity is R2 is 0.3 while vote_average is 0.051, we will use popularity as a metric to estimate gross income
# we will use popularity to estimate how well genres perform using tmdb data frame

Exemplo n.º 54

0

Exibir arquivo

Arquivo: make_scatterplots.py Projeto: Keesiu/meta-kaggle

for a, b in product(features, plottables):
    msg('Making %s %s' % (a, b))
    x = with_elo[a]
    y = with_elo[b]
    msg('type = %s' % x.dtype)
    if x.dtype == 'object':
        plt.figure()
        x.value_counts().plot(kind='bar')
        plt.savefig('/data/' + a + '_hist.png')
        plt.close('all')
    else:
        try:
            xlim = tuple(np.percentile(x, [1, 99]))
            ylim = tuple(np.percentile(y, [1, 99]))
            with sns.axes_style("white"):
                sns.jointplot(x, y, kind="hex", xlim=xlim, ylim=ylim)
            plt.savefig('/data/scatter_' + a + '_' + b + '.png')
            plt.close('all')
        except:
            #        sns.violinplot(x, y)
            #        plt.savefig('/data/' + a + '_' + b + '.png')
            #        plt.close()
            plt.figure()
            x.plot(kind='hist')
            plt.savefig('/data/' + a + '_hist.png')
            plt.close('all')

do_indivs = True
if do_indivs:
    for a, b in product(features, plottables):
        msg('Making %s %s' % (a, b))

Exemplo n.º 55

0

Exibir arquivo

Arquivo: stability_class.py Projeto: lalcayag/PhD_repository_v_1

file_out_figures = 'C:/Users/lalc/Documents/Old Documents folder/PhD/Meetings/July 2020/'
file = ['U','UN','N','SN']  
limits = [[-np.inf,-.1], [-.1,-.01], [-.01,.01], [.01,.21], [.21,np.inf]]  
limits = [[-np.inf,-.1], [-.1,-.01], [-.01,.01], [.01,.21]]
     
relind = L30min1.relscan>.25
j = -2
for i,l in enumerate(limits):
    stabind = ((Ri1[:,j]>l[0]) & (Ri1[:,j]<l[1]))
    cols = np.r_[['$L_{u_1,x_1}$', '$L_{u_1,x_2}$','$L_{v_1,x_1}$', '$L_{v_1,x_2}$','$L_{h,x_1}$', '$L_{h,x_2}$'], L30min1.columns [6:]]
    L30min1.columns = cols
    xlim = 5*200
    ylim = 5*200
    g = sns.jointplot(x ='$L_{h,x_1}$', y = '$L_{h,x_2}$', data=L30min1.loc[relind & stabind & ind1], 
                            height = 8, kind="kde", cmap="jet", xlim = (0,xlim), ylim = (0,ylim),
                            color='k')#,cbar=True, cbar_kws={"format": formatter, "label": '$Density$'})
    g.set_axis_labels('$L_{h,x_1}$', '$L_{h,x_2}$', fontsize = 24)
    g.ax_joint.plot([0,xlim],[0,ylim],'--k', linewidth = 2)
    g.ax_joint.plot(L30min1.loc[relind & stabind & ind1]['$L_{h,x_1}$'].values,L30min1.loc[relind & stabind & ind1]['$L_{h,x_2}$'].values,'o', color = 'k', alpha=.2)
    g.ax_joint.text(100, 800,'$'+'%.2f' % l[0] +'<Ri_f<'+'%.2f' % l[1] +'$',fontsize=30,color='r')
    plt.tight_layout()
    plt.savefig(file_out_figures+file[i]+'_phase_1.png')


file = ['U','UN','N','SN','VS']       
relind = L30min2.relscan>.25
for i,l in enumerate(limits):
    stabind = ((Ri2[:,-2]>l[0]) & (Ri2[:,-2]<l[1]))
    cols = np.r_[['$L_{u_1,x_1}$', '$L_{u_1,x_2}$','$L_{v_1,x_1}$', '$L_{v_1,x_2}$','$L_{h,x_1}$', '$L_{h,x_2}$'], L30min2.columns [6:]]
    L30min2.columns = cols

Exemplo n.º 56

0

Exibir arquivo

# que pasa por los valores, lo desactivamos asi

sns.distplot(tips['total_bill'],kde=False)
plt.show()

# podemos modificar la cantidad de bins que son la barras,
# con el parametro bins solo pasando un int, hay que tener
# cuidado con el tamaño del bin

sns.distplot(tips['total_bill'],kde=False,bins=40)
plt.show()

# tenemos un metodos que nos compara dos columnas dentro de
# un dataset

sns.jointplot(x='total_bill',y='tip',data=tips)
plt.show()

# podemos graficar esto de varias maneras con el parametro 
# kind usando: hex, reg, kde

# este otro metodo nos muestra una serie de graficas comparando
# todas las columnas con todas, cuando se compara con si mismo,
# muestra un histogram, y cuando es con otro, es un jointplot()

sns.pairplot(tips)
plt.show()

# si queremos dividir la informacion de cada grafica por otras
# columnas por ejemplo por sexo usamos el parametro hue, se le
# pasa una columa categorial, no que tenga un valor por eso

Exemplo n.º 57

0

Exibir arquivo

Next compare the distributions of the positive and negative examples over a few features. 
Good questions to ask yourself at this point are:

	* Do these distributions make sense?
		+ Yes. You've normalized the input and these are mostly concentrated in the +/- 2 range.
	* Can you see the difference between the ditributions?
		+ Yes the positive examples contain a much higher rate of extreme values.
-----------------------------------------------------------------------------------------
'''
pos_df = pd.DataFrame(train_features[ bool_train_labels], columns = train_df.columns)
neg_df = pd.DataFrame(train_features[~bool_train_labels], columns = train_df.columns)

sns.jointplot(
    pos_df['V5'], 
    pos_df['V6'],
    kind='hex', 
    xlim = (-5,5), 
    ylim = (-5,5)
)

plt.suptitle("Positive distribution")

sns.jointplot(
    neg_df['V5'], 
    neg_df['V6'],
    kind='hex', 
    xlim = (-5,5), 
    ylim = (-5,5)
)

_ = plt.suptitle("Negative distribution")

Exemplo n.º 58

0

Exibir arquivo

Arquivo: histograms and density plots.py Projeto: souviksaha97/Data-Science-Lab


# Histogram
sns.distplot(a = iris_data['Petal Length (cm)'], kde=False)



# Kernel Density Estimate (kde)
# This is the smoothed histogram

# kde plot
sns.kdeplot(data=iris_data['Petal Length (cm)'], shade=True)


# We can create two-dimensional kde plot
sns.jointplot(x=iris_data['Petal Length (cm)'],
              y=iris_data['Sepal Width (cm)'], kind='kde')








# Let split the data to understand difference btw species

iris_set_data = pd.read_csv('data/iris_setosa.csv', index_col="Id")
iris_ver_data = pd.read_csv('data/iris_versicolor.csv', index_col="Id")
iris_vir_data = pd.read_csv('data/iris_virginica.csv', index_col="Id")

Exemplo n.º 59

0

Exibir arquivo

def explore_global_plot(data, label='label', n_feats=50, id=None, task='classification'):
    '''
    :param data: DataFrame
    :param label: label column name in the data
    :param n_feats: the number of features be used to analysis.
    :param task: regression or classification
    :return:
    '''
    columns = data.columns.tolist()
    columns.remove(label)

    if id is not None:
        if columns[id].duplicated().sum():
            print('{} is duplicated !!!'.format(id))

        columns.remove(id)
        data.drop(id, axis=1, inplace=True)

    numeric_features = [True if any([ptypes.is_integer_dtype(i),ptypes.is_int64_dtype(i),ptypes.is_float_dtype(i)]) else False for i in data[columns].dtypes]
    numeric_names = [columns[i] for i, v in enumerate(numeric_features) if v]
    category_names = list(set(columns) - set(numeric_names))

    if task == 'classification':
        if len(category_names):
            # data distribution for each class
            new_data = data.dropna(axis=0)
            famd = prince.FAMD(
                n_components=2,
                n_iter=3,
                copy=True,
                check_input=True,
                engine='auto',
                random_state=42
            )
            famd = famd.fit(new_data[columns])
            ax = famd.plot_row_coordinates(
                new_data,
                ax=None,
                x_component=0,
                y_component=1,
                labels=new_data.index,
                color_labels=['{}'.format(t) for t in new_data[label]],
                ellipse_outline=False,
                ellipse_fill=True,
                show_points=True
            )
            plt.show()
        else:
            new_data = data.dropna(axis=0)
            pca = PCA(n_components=2, random_state=seed)
            X_pca = pca.fit_transform(new_data[columns])
            sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=label, data=new_data)
            plt.show()

    # sort features for correlation plot
    sorted_feat_name = numeric_names
    if len(numeric_names) > 6:
        n_clusters = 3
        new_data = data[[label] + numeric_names].dropna(axis=0)
        new_data_feat = new_data[numeric_names]
        new_data_stand = StandardScaler().fit_transform(new_data_feat)
        kmean_init = KMeans(n_clusters=n_clusters, random_state=seed)
        new_data_kmean=kmean_init.fit_transform(
            new_data_stand.reshape(len(numeric_names), -1))
        sorted_feat = sorted(zip(numeric_names, kmean_init.labels_), key=lambda x: x[1])
        sorted_feat_name = [i[0] for i in sorted_feat]

    # correlation plot for all features
    sns.heatmap(data[[label] + sorted_feat_name + category_names].corr())
    plt.show()

    # outlier detection just for numeric features
    outlier = data[numeric_names].apply(mad_based_outlier)
    for i, column in enumerate(outlier.columns):
        print('outlier:\n {}'.format(data[[column]][outlier.iloc[:, i]]))

    # missing value pattern plot for all features
    msno.matrix(data[columns[:n_feats]])
    plt.show()

    msno.bar(data[columns[:n_feats]])
    plt.show()

    miss_data = data[columns[:n_feats]].isnull().sum(axis=1)
    miss_data = miss_data.to_frame()
    miss_data.columns = ['number_of_missing_attributes']
    miss_data.sort_values('number_of_missing_attributes', inplace=True)
    miss_data['index'] = list(range(0, miss_data.shape[0]))
    sns.jointplot(x="index", y="number_of_missing_attributes", data=miss_data)
    plt.show()

Exemplo n.º 60

0

Exibir arquivo

def analyze_zN(z, outdir, vg, skip_umap=False, num_pcs=2, num_ksamples=20):
    zdim = z.shape[1]

    # Principal component analysis
    log('Perfoming principal component analysis...')
    pc, pca = analysis.run_pca(z)  
    log('Generating volumes...')
    for i in range(num_pcs):
        start, end = np.percentile(pc[:,i],(5,95))
        z_pc = analysis.get_pc_traj(pca, z.shape[1], 10, i+1, start, end)
        vg.gen_volumes(f'{outdir}/pc{i+1}', z_pc)

    # kmeans clustering
    log('K-means clustering...')
    K = num_ksamples
    kmeans_labels, centers = analysis.cluster_kmeans(z, K)
    centers, centers_ind = analysis.get_nearest_point(z, centers)
    if not os.path.exists(f'{outdir}/kmeans{K}'): 
        os.mkdir(f'{outdir}/kmeans{K}')
    utils.save_pkl(kmeans_labels, f'{outdir}/kmeans{K}/labels.pkl')
    np.savetxt(f'{outdir}/kmeans{K}/centers.txt', centers)
    np.savetxt(f'{outdir}/kmeans{K}/centers_ind.txt', centers_ind, fmt='%d')
    log('Generating volumes...')
    vg.gen_volumes(f'{outdir}/kmeans{K}', centers)

    # UMAP -- slow step
    if zdim > 2 and not skip_umap:
        log('Running UMAP...')
        umap_emb = analysis.run_umap(z)
        utils.save_pkl(umap_emb, f'{outdir}/umap.pkl')

    # Make some plots
    log('Generating plots...')
    plt.figure(1)
    g = sns.jointplot(x=pc[:,0], y=pc[:,1], alpha=.1, s=2)
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/z_pca.png')
    
    plt.figure(2)
    g = sns.jointplot(x=pc[:,0], y=pc[:,1], kind='hex')
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/z_pca_hexbin.png')

    if zdim > 2 and not skip_umap:
        plt.figure(3)
        g = sns.jointplot(x=umap_emb[:,0], y=umap_emb[:,1], alpha=.1, s=2)
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/umap.png')

        plt.figure(4)
        g = sns.jointplot(x=umap_emb[:,0], y=umap_emb[:,1], kind='hex')
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/umap_hexbin.png')

    analysis.scatter_annotate(pc[:,0], pc[:,1], centers_ind=centers_ind, annotate=True)
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.savefig(f'{outdir}/kmeans{K}/z_pca.png')

    g = analysis.scatter_annotate_hex(pc[:,0], pc[:,1], centers_ind=centers_ind, annotate=True)
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/kmeans{K}/z_pca_hex.png')

    if zdim > 2 and not skip_umap:
        analysis.scatter_annotate(umap_emb[:,0], umap_emb[:,1], centers_ind=centers_ind, annotate=True)
        plt.xlabel('UMAP1')
        plt.ylabel('UMAP2')
        plt.savefig(f'{outdir}/kmeans{K}/umap.png')

        g = analysis.scatter_annotate_hex(umap_emb[:,0], umap_emb[:,1], centers_ind=centers_ind, annotate=True)
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/kmeans{K}/umap_hex.png')

    for i in range(num_pcs):
        if zdim > 2 and not skip_umap:
            analysis.scatter_color(umap_emb[:,0], umap_emb[:,1], pc[:,i], label=f'PC{i+1}')
            plt.xlabel('UMAP1')
            plt.ylabel('UMAP2')
            plt.tight_layout()
            plt.savefig(f'{outdir}/pc{i+1}/umap.png')