Exemplo n.º 1
0
def plotTradeVsNews(tickName):
    path2 = "resultsMKII"
    frame = getNewsNTradingVol(tick_Name,path2)
    newsBuz = []
    tradingVol = []
    newsVol = []
    for i in range(len(frame['tradingVol'])):
        newsBuz.append(frame['NewsBuz'].values[i])
        tradingVol.append(np.log(frame['tradingVol'].values[i]))
        newsVol.append(np.log(frame['NewsVol'].values[i]))
    sns.set(style="ticks")
    x = np.array(newsBuz)
    y = np.array(tradingVol)
    ax = sns.jointplot(x,y,kind="hex",stat_func=kendalltau,color="#4CB391")
    ax.set_axis_labels(xlabel= "News Buz",ylabel="Trading Volume")
    g = sns.jointplot(x, y, kind="kde", size=7, space=0)
    g.set_axis_labels(xlabel= "News Buz",ylabel="Trading Volume")

    x = np.array(newsVol)
    ay = sns.jointplot(x,y,kind="hex",stat_func=kendalltau,color="#4CB391")
    ay.set_axis_labels(xlabel= "News Volume",ylabel="Trading Volume")

    h = sns.jointplot(x, y, kind="kde", size=7, space=0)
    h.set_axis_labels(xlabel= "News Volume",ylabel="Trading Volume")
    sns.plt.show()
    # sns.plt.subplot(2,1,1)#41B3D3
    # a1 = sns.regplot(x="NewsBuz", y="tradingVol", data=frame,ci=None,fit_reg=False,color="#1dad9b")
    # a1.set_ylim([0,4e8])
    # sns.plt.subplot(2,1,2)
    #
    # a2 = sns.regplot(x="NewsVol", y="tradingVol", data=frame,ci=None,fit_reg=False,color="#41B3D3")
    # a2.set_ylim([0,4e8])
    sns.plt.show()
Exemplo n.º 2
0
    def doplot(self, name):
        """
        Do some plots
        """

        self.trace = pickle.load( open( name, "rb" ) )

        var = np.vstack([self.trace['muCB'][:,0], self.trace['muCB'][:,1], self.trace['sdCB'][:,0], self.trace['sdCB'][:,1]]).T

        corner.corner(var, labels=['$\mu_C$', '$\mu_B$', '$\sigma_C$','$\sigma_B$'], show_titles=True)
        
        pl.show()

        # pl.savefig('{0}.png'.format(name))

        # Just get the first N samples. We shuffle the
        # arrays and get the subsamples
        C = self.trace['CB'][:,:,0]
        np.random.shuffle(C)
        C_slice = C[0:200,:].flatten()
        B = self.trace['CB'][:,:,1]
        np.random.shuffle(B)
        B_slice = B[0:200,:].flatten()

        # First option
        pl.plot(B_slice, C_slice, '.', alpha=0.002)
        pl.show()

        # KDE joint plot
        sns.jointplot(C_slice, B_slice, kind='kde')
        pl.show()
Exemplo n.º 3
0
def seaborn_join():
    data = np.random.multivariate_normal([0, 0], [[5, 2], [2, 2]], size=2000)
    data = pd.DataFrame(data, columns=['x', 'y']) 
    with sns.axes_style('white'):
        sns.jointplot("x", "y", data, kind='hex')
    
    plt.show()
Exemplo n.º 4
0
 def histogram(self,x=None, y=None, l=None, t=None, **kwargs):
     """
     this is a short-cut for creating many possible histograms, at a
     specified beamline location l, or specified time t.
     - if x and y are not input, then it creates a full joint-scatterplot
       for each pair of variables (7 variables total: x,y,z, vx, vy, vz, t)
     - if x is input, it creates a 1d histogram with respect to that parameter
     - if x and y are input, creates a 2d histogram with respect to those parameters
     """
     table = self.to_dataframe(l=l, t=t, latex=True)
     if x is None and y is None:
         g = sns.pairplot(table, **kwargs)
         for ax in g.axes.flat:
             _ = plt.setp( ax.xaxis.get_majorticklabels(), rotation=90)
         return
     if x is not None and y is None:
         x = self._reformat_label(x)
         sns.distplot(table[x], **kwargs)
         plt.xlabel(x)
         return
     if x is not None and y is not None:
         x = self._reformat_label(x)
         y = self._reformat_label(y)
         sns.jointplot(x=x, y=y, data=table, **kwargs);
         return
Exemplo n.º 5
0
def make_scatter_plot(frame, name, **kwargs):
    """
    Makes a scatter plot of column name in frame.
    """

    column_x = frame[name]
    if name == 'deltam31': column_x*=100.0

    params = []
    exclude = set(['hypo','llh','mctrue'])
    params = list(set(frame.columns).difference(exclude))

    figs = []
    # Plot correlation scatter plot for all other systematics
    for p in params:
        if p == name: continue
        column_y = frame[p]
        if p == 'deltam31': column_y*=100.0
        if 'theta' in p: column_y = np.rad2deg(column_y)

        with sns.axes_style("whitegrid"):
            sns.jointplot(column_x, column_y, size=8, color='b',
                          **kwargs)
            plt.tight_layout()
            figs.append(plt.gcf())

    return figs
Exemplo n.º 6
0
def plotBonusvsSalary(df):
    sns.jointplot(x="bonus", y="salary", data=df)
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    fig.savefig('bonusVSsalary.png', dpi=100)    
    #plt.savefig('bonusVSsalary.png')
    plt.show()
	def plot_seaborn( self ):

		# https://stanford.edu/~mwaskom/software/seaborn/tutorial/distributions.html

		data = pd.read_csv( 'movement.csv' ).as_matrix()

		# 1/2 3/4 5/6 7/8
		x_column = 3
		y_column = 4

		limit = 100
		data = data[
			  ( data[:,0] == 0)
			& ( data[:,x_column] > -limit )
			& ( data[:,x_column] < limit )
			& ( data[:,y_column] > -limit )
			& ( data[:,y_column] < limit )
		]

		x = data[:,x_column]
		y = data[:,y_column]

		with sns.axes_style( 'white' ):
			sns.jointplot( x=x, y=y, kind='kde' )  # scatter, reg, resid, hex, kde

		sns.plt.show()
Exemplo n.º 8
0
def skill_vs_speed(prediction_mode, time_model, data):
    model = TimeCombiner(prediction_mode, time_model)
    Evaluator(data, model).get_report(force_run=True)
    students = data.get_students()
    skills = prediction_mode.get_skills(students)
    fastness = time_model.get_skills(students)
    sns.jointplot(pd.Series(skills), pd.Series(fastness), kind='kde', space=0).set_axis_labels("skill", "speed")
Exemplo n.º 9
0
def show_graph(data):
    """ Show time series graph of given data. """
    height_list = sorted([[p[0], height(p[1:])] for p in data],
                         key=lambda x: x[0])
    df = pd.DataFrame(height_list)
    df.columns = ["time","height"]
    seaborn.jointplot('time', 'height', data=df)
    plt.show()
Exemplo n.º 10
0
def sbratio(sampler):
    chain = sampler.flatchain
    chain[:,2]=np.abs(chain[:,2])
    chain[:,4]=np.abs(chain[:,4])
    dd = pd.DataFrame(data=chain,
                      columns=['theta','phi','scatter','badfrac','badsig','badmn'])
    with sns.axes_style("white"):
        sns.jointplot("theta", "phi", data, kind="kde");
Exemplo n.º 11
0
 def plot(self, samples, columns=None):
     if(columns is None):
         df = pd.DataFrame(samples, columns=["x", "y"])
         sns.jointplot(x="x", y="y", data=df)
     else:
         df = pd.DataFrame(samples, columns=[columns[0], columns[1]])
         # sns.jointplot(x=names[0], y=names[1], data=df, xlim=xlim, ylim=ylim)
         sns.jointplot(x=columns[0], y=columns[1], data=df)
Exemplo n.º 12
0
def plot_scatter_hist_sns(x, y):
    #sns.set(color_codes=True)
    #sns.set(style="darkgrid")
    sns.set(style="ticks")
    sns.jointplot(np.array(x), np.array(y), kind="hex", size=4, stat_func=None).set_axis_labels("$\phi$", "$\\theta$")
    with PdfPages('plot4.pdf') as pdf:
         pdf.savefig()
    sns.plt.close() 
Exemplo n.º 13
0
def plot(data, total, title, width=800.0, unit='', dosort=True,
		target=None, target2=None):
	"""A HTML bar plot given a dictionary and max value."""
	if len(data) > 30 and target is not None:
		df = pandas.DataFrame(index=data)
		df[title] = pandas.Series(data, index=df.index)
		df[target.name] = target.ix[df.index]
		if target2 is not None:
			df[target2.name] = target2.ix[df.index]
		if target.dtype == numpy.number:
			if target2 is None:
				seaborn.jointplot(target.name, title, data=df, kind='reg')
			else:
				seaborn.lmplot(target.name, title, data=df, hue=target2.name)
		else:  # X-axis is categorical
			df.sort_values(by=target.name, inplace=True)
			if target2 is None:
				seaborn.barplot(target.name, title, data=df)
			else:
				seaborn.barplot(target.name, title, data=df, hue=target2.name)
			fig = plt.gcf()
			fig.autofmt_xdate()
		# Convert to D3, SVG, javascript etc.
		# import mpld3
		# result = mpld3.fig_to_html(plt.gcf(), template_type='general',
		# 		use_http=True)

		# Convert to PNG
		figfile = io.BytesIO()
		plt.savefig(figfile, format='png')
		result = '<div><img src="data:image/png;base64, %s"/></div>' % (
				base64.b64encode(figfile.getvalue()).decode('utf8'))
		plt.clf()
		return result

	result = ['<div class=barplot>',
			('<text style="font-family: sans-serif; font-size: 16px; ">'
			'%s</text>' % title)]
	if target is not None:
		data = OrderedDict([(key, data[key]) for key in
				target.sort_values().index if key in data])
	keys = {key.split('_')[0] if '_' in key else key[0] for key in data}
	color = {}
	if len(keys) <= 5:
		color.update(zip(keys, range(1, 6)))
	keys = list(data)
	if dosort:
		keys.sort(key=data.get, reverse=True)
	for key in keys:
		result.append('<br><div style="width:%dpx;" class=b%d></div>'
				'<span>%s: %g %s</span>' % (
				int(round(width * data[key] / total)) if data[key] else 0,
				color.get(key.split('_')[0] if '_' in key else key[0], 1)
					if data[key] else 0,
				htmlescape(key), data[key], unit,))
	result.append('</div>\n')
	return '\n'.join(result)
Exemplo n.º 14
0
def make_JointPlot(plot, region, data, backgrounds) :

    sample_to_plot = []
    if data.name == plot.sample : sample_to_plot.append(data)
    if not len(sample_to_plot) :
        for bk in backgrounds :
            if bk.name == plot.sample : sample_to_plot.append(bk)
    if len(sample_to_plot) == 0 or len(sample_to_plot) > 1 :
        msg('ERROR make_JointPlot received %d samples to plot for plot with name %s'%(len(sample_to_plot), plot.name))
        sys.exit()

    # turn this tree into an array :)
    sample_to_plot = sample_to_plot[0]
    selection_ = '(' + region.tcut + ') * eventweight * ' + str(sample_to_plot.scale_factor)
    tree_array = tree2rec(sample_to_plot.tree, branches=[plot.x_var, plot.y_var],
                            selection=selection_)
    tree_array.dtype.names = (plot.x_var, plot.y_var)
    x_arr = tree_array[plot.x_var]
    y_arr = tree_array[plot.y_var]

    sns.set(style="white")

    # stats?
    stat_func_ = None
    if plot.stat_func == "kendalltau" :
        from scipy.stats import kendalltau
        stat_func_ = kendalltau
    elif plot.stat_func == None :
        from scipy.stats import pearsonr
        stat_func_ = pearsonr

    j_plot_grid = None
    if plot.cmap == None or plot.cmap == "default" :
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, color = plot.color, linewidth = plot.line_width, ylim=[plot.y_range_min,plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
        #j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, color = plot.color, linewidth = plot.line_width, joint_kws={"n_levels":plot.n_levels, "shade":True}, ylim=[plot.y_range_min,plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])

    elif plot.cmap == "cubehelix" :
        cmap_ = sns.cubehelix_palette(as_cmap=True, dark=0, light=1, reverse = True)
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, linewidth = plot.line_width, joint_kws={"cmap":cmap_, "n_levels":plot.n_levels, "shade":True}, ylim=[plot.y_range_min, plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
    elif plot.cmap == "blues" :
        j_plot_grid = sns.jointplot(x_arr, y_arr, kind = plot.kind, stat_func=stat_func_, linewidth = 1.0, joint_kws={"cmap":"Blues", "n_levels":plot.n_levels, "shade":True, "shade_lowest":False}, ylim=[plot.y_range_min, plot.y_range_max], xlim=[plot.x_range_min,plot.x_range_max])
    else :
        msg("cmap attribute of joint plot not yet added")
        sys.exit()

    j_plot_grid.fig.suptitle(plot.title)
    j_plot_grid.fig.subplots_adjust(top=0.935)
    j_plot_grid.set_axis_labels(plot.x_label, plot.y_label)


    # save the plot to file
    outname = plot.name + ".eps"
    j_plot_grid.savefig(outname)
    out = indir + "/plots/" + outdir 
    utils.mv_file_to_dir(outname, out, True)
    fullname = out + "/" + outname
    msg("%s saved to : %s"%(outname, os.path.abspath(fullname)))
Exemplo n.º 15
0
def plot_approx_posterior(cov, means, index):
	mean = means[index]
	print mean.shape
	mean, cov = util.product_gaussians(mean, np.zeros(2), cov, np.identity(2))
	data = np.random.multivariate_normal(mean, cov, 200)
	df = pd.DataFrame(data, columns=["x", "y"])
	xlim = (mean[0] - 3*np.sqrt(cov[0][0]),mean[0] + 3*np.sqrt(cov[0][0]))
	ylim = (mean[1] - 3*np.sqrt(cov[1][1]),mean[1] + 3*np.sqrt(cov[1][1]))
	sns.jointplot(x="x", y="y", data=df, kind="kde", stat_func= None, xlim = xlim, ylim = ylim)	
Exemplo n.º 16
0
def plot_var(times, pitches, ends, var_n):
    """ Show time series graph of variation [var_n]. """
    # var_n: 0 to 30 (0: Aria)
    n_data = filter(lambda x:(ends[var_n] < x[0] <= ends[var_n+1]),
                    zip(times, pitches))
    # seaborn
    df = pd.DataFrame(n_data)
    df.columns = ["time","height"]
    seaborn.jointplot('time', 'height', data=df)
    plt.show()
Exemplo n.º 17
0
 def show(self):
     Y = np.reshape(self._pr,(1,-1)).tolist()[0]
     X = self._lams
     df = pd.DataFrame({'x':X,'y':Y})
     sns.jointplot(x='x',y='y',data=df)
     Y = np.asarray(Y)
     X = np.asarray(X)
     mean = (X*Y).sum()
     sns.plt.title('mean %f'%mean)
     sns.plt.show()
Exemplo n.º 18
0
def hist_2d(distribution, nsamples, **kwargs):
    """
    Plots a 2d hexbinned histogram of distribution
    """
    distr = distribution(ndims=2)
    sampler = MarkovJumpHMC(distr.Xinit, distr.E, distr.dEdX, **kwargs)
    samples = sampler.sample(nsamples)

    with sns.axes_style("white"):
        sns.jointplot(samples[0], samples[1], kind="kde", stat_func=None)
Exemplo n.º 19
0
def pairwise_joint_plots(df, cols):
    logging.debug('Plotting pairwise joint distributions')
    cols = sorted(cols)
    for colA, colB in [(a,b) for a in cols for b in cols if a < b]:
        file = 'joint_{}_{}.png'.format(colA, colB)
        logging.debug('joint plot: %s', file)
        fig = plt.figure()
        sns.jointplot(df[colA], df[colB], kind='hex')
        plt.savefig(file)
        plt.close()
Exemplo n.º 20
0
def AnalyzeAllElectrodes():
    """From Jacek """
    path = '/Users/ryszardcetnarski/Desktop/Nencki/Badanie_NFB/Dane/wszystkie_elektrody_jacek.csv'
    db = pd.read_csv(path)

    for band in ['theta', 'alpha','smr', 'beta1', 'beta2']:
        db[band+'_po'] = db[band+ '_przed'] + db[band+'_roznica']
    #    fig = plt.figure()
     #   fig.suptitle(band)
      #  corr = fig.add_subplot(211)
      #  diff = fig.add_subplot(212)

        sns.jointplot(band +'_przed', band+'_po', data=db, kind="reg")#, color="r", size=7)

      #  fig = plt.figure()
       # fig.suptitle(band)

        sns.jointplot(band +'_przed', band+'_roznica', data=db, kind="reg")#, color="r", size=7)
        conditions_str = ['mixed_conditions' for i in range(0,len(db))]
        conditions = [0 for i in range(0,len(db))]
        GeneralModel( db[band+ '_przed'] ,  db[band+ '_po'] , band, conditions, conditions_str)
        #corr.scatter(db[band +'_przed'], db[band+'_po'])
        #diff.scatter(db[band +'_przed'], db[band+'_roznica'])

    return db







#Kde using sklearn, returns object
   # kde = KernelDensity(kernel='tophat', bandwidth = 3).fit(initial[:, np.newaxis])
   # log_dens = kde.score_samples(x[:, np.newaxis])

    #Plot sklearn kernel estimate
   # kernel.plot(x, np.exp(log_dens), 'g')
    #Plot original data histogram


  #followUp = np.random.random_sample(100)
    #followUp= np.random.normal(20,10, 100)

    #followUp = np.random.normal(20,10, 100)#initial + np.random.normal(0,100,100)
    #followUp = np.random.random_sample(100)#initial + np.random.normal(0,100,100)
    #hist.hist(initial)

    #initial = np.random.normal(20,10, 100)
    #initial = np.random.random_sample(100)


    #Add noise to each observation
    #initial = #initial *0.95 + np.random.normal(100,100,100)
    #Make a follow up by adding nosie second time to the same population
Exemplo n.º 21
0
def plotCorrelation(frame):

    # Plot correlation of each variable to visualize each dimension:
    sns.jointplot("bedrooms","price",frame,size=8)
    plt.tight_layout()
    sns.jointplot("size","price",frame,size=8)
    plt.tight_layout()

    print("PAUSED...close figures to continue...")
    plt.show()
    return
Exemplo n.º 22
0
def gauss_2d(nsamples=1000):
    """
    Another simple test plot
    1d gaussian sampled from each sampler visualized as a joint 2d gaussian
    """
    gaussian = misc.distributions.TestGaussian(ndims=1)
    control = Control(gaussian.Xinit, gaussian.E, gaussian.dEdX)
    experimental = ContinuousTimeHMC(gaussian.Xinit, gaussian.E, gaussian.dEdX)

    with sns.axes_style("white"):
        sns.jointplot(control.sample(nsamples)[0], experimental.sample(nsamples)[0], kind="hex", stat_func=None)
Exemplo n.º 23
0
 def drawJointPlot(self, se1, se2):
     """
     画线性相关图,表示序列1和序列2的相关性
         :param self: 类变量本身
         :param se1: 序列1
         :param se2: 序列2
     """   
     sns.jointplot(se1, se2, kind='reg', color=self.linecolors[0])
     # plt.title(self.title)
     plt.legend()
     plt.show()
Exemplo n.º 24
0
def fixed_effects(data, labels):
    
    corcoeff, p_val = pearsonr(data[labels[0]], data[labels[1]])
    print "Pearson correlation between %s and %s across all donors is %g (two tailed p value = %g)"%(labels[0], labels[1], corcoeff, p_val)
    
    grid = sns.jointplot(labels[0], labels[1], data, kind="hex")
    sns.jointplot(labels[0], labels[1], data, kind="reg", 
                         xlim=grid.ax_joint.get_xlim(),
                         ylim=grid.ax_joint.get_ylim())
    plt.show()
    
    return corcoeff, p_val
Exemplo n.º 25
0
def covlen(args):
    """
    %prog covlen covfile fastafile

    Plot coverage vs length. `covfile` is two-column listing contig id and
    depth of coverage.
    """
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from jcvi.formats.base import DictFile

    p = OptionParser(covlen.__doc__)
    p.add_option("--maxsize", default=1000000, type="int", help="Max contig size")
    p.add_option("--maxcov", default=100, type="int", help="Max contig size")
    p.add_option("--color", default='m', help="Color of the data points")
    p.add_option("--kind", default="scatter",
                 choices=("scatter", "reg", "resid", "kde", "hex"),
                 help="Kind of plot to draw")
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 2:
        sys.exit(not p.print_help())

    covfile, fastafile = args
    cov = DictFile(covfile, cast=float)
    s = Sizes(fastafile)
    data = []
    maxsize, maxcov = opts.maxsize, opts.maxcov
    for ctg, size in s.iter_sizes():
        c = cov.get(ctg, 0)
        if size > maxsize:
            continue
        if c > maxcov:
            continue
        data.append((size, c))

    x, y = zip(*data)
    x = np.array(x)
    y = np.array(y)
    logging.debug("X size {0}, Y size {1}".format(x.size, y.size))

    df = pd.DataFrame()
    xlab, ylab = "Length", "Coverage of depth (X)"
    df[xlab] = x
    df[ylab] = y
    sns.jointplot(xlab, ylab, kind=opts.kind, data=df,
                  xlim=(0, maxsize), ylim=(0, maxcov),
                  stat_func=None, edgecolor="w", color=opts.color)

    figname = covfile + ".pdf"
    savefig(figname, dpi=iopts.dpi, iopts=iopts)
Exemplo n.º 26
0
def main():
    movie_raw_data = pd.read_csv('../input/movie_metadata.csv')
    print movie_raw_data.head(3)

    print movie_raw_data.isnull().sum()

    print movie_raw_data.shape
    movie_raw_data_dropna=movie_raw_data.dropna()
    print movie_raw_data_dropna.shape
    print movie_raw_data.dtypes


    # movie_filterd_imdbscore=movie_raw_data['imdb_score'].loc
    # movie_filterd_imdbscore=movie_raw_data.loc[movie_raw_data['imdb_score'].isin([2,3])]

    movie_filterd_imdbscore_first=movie_raw_data.loc[movie_raw_data['imdb_score'] >5]
    movie_filterd_imdbscore_from_raw=movie_raw_data.loc[movie_raw_data['imdb_score'] <8]

    print movie_filterd_imdbscore_first.shape


    movie_filterd_imdbscore_second=movie_filterd_imdbscore_first.loc[movie_raw_data['imdb_score'] <8]

    print movie_filterd_imdbscore_second.shape
    print movie_filterd_imdbscore_from_raw.shape

    print '*********************************'

    print movie_raw_data_dropna.head(3)
    profit=(((movie_raw_data_dropna['gross'].values-movie_raw_data_dropna['budget'].values))/(movie_raw_data_dropna['gross'].values))*100
    print profit

    movie_raw_data_dropna.loc[:,'profit']=pd.Series(profit, movie_raw_data_dropna.index)
    print movie_raw_data_dropna.shape
    print movie_raw_data_dropna.head(3)


    corr=movie_raw_data_dropna.corr()
    print corr

    f, ax = plt.subplots(figsize=(11, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(corr, cmap=cmap, vmax=1,
            square=True,
            linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)

    g = sns.jointplot(x="title_year", y="profit",kind='scatter',size=10,ylim = [0,110],xlim=[1980,2020],data=movie_raw_data_dropna)
    h = sns.jointplot(x="imdb_score", y="profit",kind='reg',size=10,ylim = [0,110],data=movie_raw_data_dropna)

    # j = sns.pairplot(movie_raw_data_dropna,hue='content_rating')

    plt.show()
Exemplo n.º 27
0
Arquivo: c5.py Projeto: 3774257/abu
def sample_54_1():
    """
    5.4 使用seaborn可视化数据
    :return:
    """
    sns.distplot(tsla_df['p_change'], bins=80)
    plt.show()

    sns.boxplot(x='date_week', y='p_change', data=tsla_df)
    plt.show()

    sns.jointplot(tsla_df['high'], tsla_df['low'])
    plt.show()
def occupationAnalysis():
    img = plt.imread("playground.jpg")
    robot_position = readLog( "./csv/windfield_game1_green_withindex_position.csv")
    data=np.zeros((nbcols, nbrows))
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1

    robot_position = readLog( "./csv/windfield_game1_orange_withindex_position.csv")
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1

    robot_position = readLog( "./csv/windfield_game1_blue_withindex_position.csv")
    for robotp in robot_position:
        robotp = robotp.split(',')
        print(robotp[0])
        px = int(float(robotp[1]) * nbcols)
        py = int(float(robotp[2]) * nbrows)
        data[px][py]+=1
    fig, ax = plt.subplots()
    #heatmap = ax.pcolor(data)
    red_high = ((0., 0., 0.),
         (.3, .5, 0.5),
         (1., 1., 1.))

    blue_middle = ((0., .2, .2),
         (.3, .5, .5),
         (.8, .2, .2),
         (1., .1, .1))

    green_none = ((0,0,0),(1,0,0))

    cdict3 = {'red':  red_high,
     'green': green_none,
     'blue': blue_middle,
     'alpha': ((0.0, 0.0, 0.0),
               (0.3, 0.5, 0.5),
               (1.0, 1.0, 1.0))
    }

    #ax.scatter(x, y, label=str(i), color=color, alpha=0.5)
    #dropout_high = LinearSegmentedColormap('Dropout', cdict3)
    #plt.register_cmap(cmap = dropout_high)
    sns.jointplot(x="x", y="y", data=data, kind="kde");
Exemplo n.º 29
0
def performance_vs_coverage(db, output=None, max_values=250, **kwargs):
    data = [
        row for row in
        db.execute(
            "SELECT "
            "    performance AS performance, "
            "    coverage "
            "FROM param_stats"
        )
    ]
    frame = pandas.DataFrame(data, columns=("Performance", "Legality"))
    sns.jointplot("Legality", "Performance", data=frame,
                  xlim=(0, 1), ylim=(0, 1))
    viz.finalise(output, **kwargs)
Exemplo n.º 30
0
 def show(self):
     pos = np.argsort(self.pr)[0][-20:]
     for k in pos:
         print self.hypos[k],self.pr[0,k]
     pos = np.argmax(self.pr)
     print 'max',self.hypos[pos],'pr=',self.pr[0,pos]
     X = []
     for idx,hypo in enumerate(self.hypos):
         N,f = hypo
         X.append(idx)
     Y = self.pr.tolist()[0]
     df = pd.DataFrame({'x':X,'y':Y})
     sns.jointplot(x='x',y='y',data=df)
     sns.plt.show()
Exemplo n.º 31
0
# In[25]:

# comapre with men and women that who have more target zero and who have not
fig, ax = plt.subplots(figsize=(10, 5))
sns.countplot(df['target'], hue=df['sex'], ax=ax)
plt.xlabel('target')
plt.ylabel('sex')
plt.xticks(rotation=50)
plt.show

# In[26]:

nums = ['age', 'sex', 'trestbps', 'chol', 'trestbps', 'target']
for i in nums:
    plt.figure(figsize=(20, 10))
    sns.jointplot(x=df[i], y=df['target'], kind='reg')
    plt.xlabel(i)
    plt.ylabel('resposne')
    plt.grid()
    plt.show()

# In[8]:

plt.bar(df['target'], df['age'], alpha=.5, width=0.8, label='chart')
plt.show()

# In[62]:

sns.catplot('sex', 'target', data=df, kind='box', hue='fbs')

# In[53]:
print("Minimum Cost: ${}".format(_min_cost)) 
print("Maximum Cost: ${}".format(_max_cost))
print("Mean Cost: ${}".format(_mean_cost))
print("Median Cost ${}".format(_median_cost))
print("Standard deviation of Cost: ${}".format(_stddev_cost))


_housedata['bedrooms'].value_counts().plot(kind='bar')
plt.title('Total number of Bedroom')
plt.xlabel('Bedrooms')
plt.ylabel('Count of Bedrooms')
plt.show()
#sns.despine

plt.figure(figsize=(10,10))
sns.jointplot(x=_housedata.lat.values, y=_housedata.long.values, size=10)
plt.ylabel('Longitude of House', fontsize=12)
plt.xlabel('Latitude of House', fontsize=12)
plt.show()
#plt1 = plt()
#sns.despine

plt.scatter(_housedata.price,_housedata.sqft_living)
plt.title("Price of House vs Square Feet of House")
plt.show()

plt.scatter(_housedata.price,_housedata.long)
plt.title("Price of House vs Location of the house area")
plt.show()

plt.scatter(_housedata.price,_housedata.lat)
Exemplo n.º 33
0
    names = ['variance','skewness','curtosis','entropy','class'])

data.head(3)
data.describe()
data.shape
data.isna().any()
data.dtypes
data['class'].unique()
sns.countplot(x='class', data= data)
sns.violinplot( y=data['curtosis'])
sns.violinplot( y=data['entropy'])
sns.violinplot( y=data['variance'])
sns.violinplot( y=data['skewness'])
p1=sns.kdeplot(data['curtosis'], shade=True, color="r")
p1=sns.kdeplot(data['variance'], shade=True, color="b")
sns.jointplot(x=data['curtosis'], y=data['entropy'], kind='hex', linewidth = 2)
sns.jointplot(x=data['skewness'], y=data['variance'], kind='hex', color = 'skyblue', linewidth = 2)
sns.jointplot(x=data['curtosis'], y=data['variance'], kind='hex', linewidth = 2)
X = data[['variance', 'skewness' ,'curtosis', 'entropy']]
y = data[['class']]

from sklearn.model_selection import train_test_split # Support Vector Machine
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

from sklearn.svm import SVC
SVC()
svc = SVC()

# Creating a dictionary of parameters

parameters = {
Exemplo n.º 34
0
    def kde(self, n=0):

        sns.jointplot(x=self.samples[:, n, 0],
                      y=self.samples[:, n, 1],
                      kind="kde")
def generate_plots(plot_type=""):
    r"""
    Generate plots studying the distribution of graphs in different splits with respect to the graph size (|V| and |E|)
    
    :param plot_type: type of plot in {"histograms", "marginal_E", "marginal_V", "joint"}
    """
    assert plot_type in {"histograms", "marginal_E", "marginal_V", "joint"}
    split_names = ["test", "valid", "train"]

    tot_n_nodes = []
    tot_n_edges = []
    for split_name in split_names:
        d = ToulouseRoadNetworkDataset(split=split_name,
                                       step=0.001,
                                       max_prev_node=8)
        dataloader = DataLoader(d,
                                batch_size=1,
                                shuffle=False,
                                collate_fn=custom_collate_fn)

        n_nodes = []
        n_edges = []
        for datapoint in dataloader:
            this_x_adj, this_x_coord, this_y_adj, this_y_coord, this_img, this_seq_len, this_id = datapoint
            n_edges.append(int(this_y_adj.view(-1).sum().item()))
            n_nodes.append(int(this_seq_len[0] - 2))

        tot_n_edges += n_edges
        tot_n_nodes += n_nodes
        n_nodes = np.array(n_nodes)
        n_edges = np.array(n_edges)

        print(f"{split_name} min/mean/max len nodes", np.min(n_nodes),
              np.mean(n_nodes), np.max(n_nodes))
        print(f"{split_name} min/mean/max len edges", np.min(n_edges),
              np.mean(n_edges), np.max(n_edges))

        if plot_type == "histograms":
            plt.hist(n_nodes, bins=np.max(n_nodes) - np.min(n_nodes) +
                     1)  # arguments are passed to np.histogram
            plt.title(f"Histogram of |V| for {split_name}")
            plt.savefig(f"plots/histogram_|V|_{split_name}.png")
            plt.clf()
            plt.hist(n_edges, bins=np.max(n_edges) - np.min(n_edges) +
                     1)  # arguments are passed to np.histogram
            plt.title(f"Histogram of |E| for {split_name}")
            plt.savefig(f"plots/histogram_|E|_{split_name}.png")
            plt.clf()
        elif plot_type == "marginal_V":
            a = sns.kdeplot(n_nodes, bw=.5, shade=True, label=split_name)
        elif plot_type == "marginal_E":
            b = sns.kdeplot(n_edges, bw=.5, shade=True, label=split_name)
        else:
            sns_plot = sns.jointplot(np.log10(n_nodes),
                                     np.log10(n_edges),
                                     marginal_kws=dict(kernel="gau", bw=.02),
                                     kind="kde",
                                     bw=.05)
            sns_plot.ax_joint.set_xlabel("log10 |V|", fontsize=15)
            sns_plot.ax_joint.set_ylabel("log10 |E|", fontsize=15)
            sns_plot.ax_marg_x.set_title(split_name, fontsize=20)
            sns_plot.ax_joint.set_xlim(0.6, 1.2)
            sns_plot.ax_joint.set_ylim(0.4, 1.2)
            sns_plot.savefig(f"plots/joint_{split_name}.png")

    tot_n_nodes = np.array(tot_n_nodes)
    tot_n_edges = np.array(tot_n_edges)
    print(f"min/mean/max len nodes", np.min(tot_n_nodes), np.mean(tot_n_nodes),
          np.max(tot_n_nodes))
    print(f"min/mean/max len edges\n", np.min(tot_n_edges),
          np.mean(tot_n_edges), np.max(tot_n_edges))

    if plot_type == "marginal_V":
        a.set_xlabel("|V|")
        a.set_ylabel("p(x)")
        a.set_title("Distributions of |V|")
        a.legend()
        a.figure.savefig(f"plots/marginal_|V|.png")
        a.figure.clf()

    if plot_type == "marginal_E":
        b.set_xlabel("|E|")
        b.set_ylabel("p(x)")
        b.set_title("Distributions of |E|")
        b.legend()
        b.figure.savefig(f"plots/marginal_|E|.png")
        b.figure.clf()

    print("Done!")
Exemplo n.º 36
0
createFigure(
    figure_data_without_zynex, 'EY_ROC', EARNINGS_YIELD, 'Return On Capital (%)',
    'Earnings Yield (%)', 'ey_roc.png', 'lower right',
    vscaling=1.2, hscaling=2)
createFigure(
    figure_data, 'total_rank', 'EY_rank', 'Rank Return On Capital',
    'Rank Earnings Yield', 'ey_roc_rank.png', 'upper right',
    number_format='%d', vscaling=1.2, hscaling=2)

# Drop outliers
df_capped = df[df[EARNINGS_YIELD].between(
    df[EARNINGS_YIELD].quantile(0.05), df[EARNINGS_YIELD].quantile(0.95))]
df_capped = df_capped[df_capped['ROC'].between(
    df_capped['ROC'].quantile(0.05), df_capped['ROC'].quantile(0.95))]

# Save density plot
ax = sb.jointplot(EARNINGS_YIELD, 'ROC', data=df_capped, kind='kde', color="g")
ax.set_axis_labels('Earnings Yield (%)', 'Return On Capital (%)')
plt.tight_layout()
plt.savefig('density_plot.png', format='png')

plt.clf()

# Create industry histogram
ax = sb.countplot(x=SECTOR, data=figure_data, palette='Blues_d')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.set_ylabel('Amount')
ax.set_xlabel('Industry')
plt.tight_layout()
plt.savefig('industry_histogram.png', format='png')
Exemplo n.º 37
0
#%%
#Histogramme
seaborn.distplot(ordis.price)

#%%
# Boîte à moustaches
seaborn.factorplot("price", data=ordis, kind="box")

#%%
# violin
seaborn.factorplot("price", data=ordis, kind="violin")

#%%
# Lien entre price et var quanti (speed, hd)
seaborn.factorplot("speed", "price", data=ordis)
seaborn.jointplot("hd", "price", data=ordis, kind="reg")

#%%
# Lien entre price et var quali (ram, cd, premium, screen)
seaborn.factorplot("ram", "price", data=ordis, kind="box")
seaborn.factorplot("cd", "price", data=ordis, kind="box")
seaborn.factorplot("premium", "price", data=ordis, kind="box")
seaborn.factorplot("screen", "price", data=ordis, kind="box")

#%%
# price ~ speed et hd
t = pandas.crosstab(pandas.cut(ordis.hd, 6, precision=0),
                    ordis.speed,
                    values=ordis.price,
                    aggfunc=numpy.mean)
seaborn.heatmap(t, cmap="Blues", cbar_kws={'label': 'mean price'})
Exemplo n.º 38
0
# In[18]:

sns.pairplot(sub_task_summary_Output, hue='EV', palette='Set1')

# In[20]:

# SIMPLE LINE PLOT
sub_task_summary_Output['EV'].plot(figsize=(20, 12))

# In[26]:

# In[65]:

plt.figure(figsize=(12, 8))

sns.jointplot(x='SPI', y='EV', data=sub_task_summary_Output, color='hotpink')
sns.jointplot(x='CPI', y='EV', data=sub_task_summary_Output, color='red')
sns.jointplot(x='EAC', y='EV', data=sub_task_summary_Output, color='blue')

#

# In[41]:

# In[55]:

# In[56]:

# In[66]:

# In[67]:
Exemplo n.º 39
0
def heatscatter_sns(x, y, figsize=(8, 8)):
    sns.set(rc={'figure.figsize': figsize})
    sns.set(style="white", color_codes=True)
    sns.jointplot(x=x, y=y, kind='kde', color="skyblue")
Exemplo n.º 40
0
plt.figure(figsize=(10, 25))
sns.countplot(y='country', data=dataset, alpha=alpha)
plt.title('Data by country')
plt.show()

# Between Genders Male vs Female
plt.figure(figsize=(7, 7))
sex = sns.countplot(x='sex', data=dataset)

# Corelation between the Data
plt.figure(figsize=(16, 7))
cor = sns.heatmap(dataset.corr(), annot=True)

g = sns.jointplot(dataset.year,
                  dataset.suicides_no,
                  kind="kde",
                  color="#bfa9e0",
                  size=7)
plt.savefig('graph.png')

# Visualizing which age of people Suicide the most
plt.figure(figsize=(16, 7))
bar_age = sns.barplot(x='sex', y='suicides_no', hue='age', data=dataset)

# Visualizing which Generation of people Suicide the most
plt.figure(figsize=(16, 7))
bar_gen = sns.barplot(x='sex', y='suicides_no', hue='generation', data=dataset)

cat_accord_year = sns.catplot('sex',
                              'suicides_no',
                              hue='age',
Exemplo n.º 41
0
df = DataFrame(iris.data,columns = iris.feature_names)
df['target'] = iris.target
print(df)

#数据可视化
import pandas as pd
from scipy import stats,integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes = True)
#数据分布可视化,直方图和密度函数
#distplot()函数默认绘出数据的直方图和密度函数
sns.distplot(df['petal length (cm)'],bins = 15)

#jointplot()函数同时绘制散点图和直方图
sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,size =8)


#分组散点图
#用seaborn.FacetGrid标记不同的种类
sns.FacetGrid(df,hue = 'target',size =8).map(plt.scatter,'sepal length (cm)','sepal width (cm)').add_legend()


#六边形图
sns.axes_style('white')
sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,kind = 'hex',color = 'r')

#二维核密度估计图
g = sns.jointplot(x = 'sepal length (cm)',y = 'sepal width (cm)',data = df,kind = 'kde',color = 'm')
#添加散点图
g.plot_joint(plt.scatter,c='w',s=30,linewidth=1,marker='+')
sns.distplot(bd['age'], kde=False, norm_hist=True, bins=10)
sns.distplot(bd['age'], hist=False)
sns.distplot(bd['age'], hist=False)

myimg = myplot.get_figure()
myimg.savefig('distplot.png')

sns.kdeplot(bd['age'])  # other distribution plot, less used
sns.kdeplot(bd['age'], shade=True)  # shade area
sns.kdeplot(bd['pdays'], shade=True)

myplot = sns.boxplot(y='age', data=bd)
myimg = myplot.get_figure()
myimg.savefig('boxplot.png')

myplot = sns.jointplot(x='age', y='balance', data=bd.iloc[:500, :])
myimg = myplot.get_figure()  # not work in jointplot
myimg.savefig('jointplot.png')
myplot = sns.jointplot(x='age',
                       y='balance',
                       data=bd.iloc[:100, :],
                       kind='hex',
                       size=10)
# light colour less density,givenby hex
help(sns.jointplot)
sns.jointplot(x='age',
              y='duration',
              data=bd.iloc[:100, :],
              kind='kde',
              size=10)
myplot = sns.lmplot(x='age', y='balance', data=bd.iloc[1:10, :])
sns.distplot(data['x'])
sns.distplot(data['y'])

# In[9]:

for col in 'xy':
    sns.kdeplot(data[col], shade=True)

# In[10]:

sns.kdeplot(data)

# In[12]:

with sns.axes_style('white'):
    sns.jointplot("x", "y", data, kind='kde')

# In[13]:

with sns.axes_style('white'):
    sns.jointplot("x", "y", data, kind='hex')

# In[14]:

sns.pairplot(data)

# In[20]:

import plotly.graph_objs as go
import numpy as np
x = np.random.randn(2000)
Exemplo n.º 44
0
    5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 1, 11, 10, 10, 10, 10,
    10, 10, 10, 8, 3, 7, 3, 2, 2, 2, 11, 7, 7, 11, 11, 9, 9, 8, 8, 8, 8, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 6, 12, 12, 12, 11, 11, 11, 9, 9, 9, 9, 9, 11, 11, 10,
    1, 12, 12, 12, 3, 2, 12, 11, 11, 11, 11, 11, 11, 11, 10, 3, 11, 11, 2, 2,
    1, 1, 1, 12, 12, 12, 12, 12, 12, 12, 6, 6
]
y = [
    30, 29, 29, 24, 19, 11, 9, 8, 7, 3, 57, 54, 52, 34, 30, 29, 8, 1, 49, 44,
    33, 31, 29, 29, 28, 27, 2, 6, 5, 52, 41, 36, 18, 27, 26, 46, 32, 35, 33,
    15, 14, 10, 0, 51, 49, 44, 43, 28, 27, 26, 19, 16, 56, 21, 19, 16, 49, 43,
    39, 25, 23, 22, 21, 13, 23, 1, 13, 17, 59, 55, 54, 10, 59, 1, 59, 57, 27,
    25, 22, 21, 4, 49, 59, 31, 30, 5, 0, 8, 6, 0, 39, 37, 35, 31, 27, 25, 18,
    11, 9
]

# print rs
# x = rs.gamma(12, size=60)
# y = 2 + rs.gamma(60,size=60)
# x = rs.gamma(2, size=1000)

# print 'y = '+ str(y)

graph = sns.jointplot(x, y, kind="hex", stat_func=kendalltau, color="#4CB391")

# x = np.random.normal(size=100)
# print 'x = '+ str(x)
# graph = sns.distplot(x);

sns.plt.savefig(__main__.__file__ + ".png")
# graph.pyplot.show()
sns.plt.show()
Exemplo n.º 45
0
print("Kurtosis:")
print(data_set['T_MAX'].kurtosis())

## Graph T MAX / CO & O3
df = data_set.sort_values(['T_MAX', 'CO'], ascending=True)
plt.plot(df['T_MAX'], df['CO'])
plt.title("La concentración de CO frente a la temperatura máxima")
plt.show()

df = data_set.sort_values(['T_MAX', 'O3'], ascending=True)
plt.plot(df['T_MAX'], df['O3'])
plt.title("La concentración de Ozono frente a la temperatura máxima")
plt.show()

## Pairplot
sns.jointplot(data_set['T_MAX'], data_set['CO'], kind="reg")
plt.show()
plt.close()
sns.jointplot(data_set['T_MAX'], data_set['O3'], kind="reg")
plt.show()
plt.close()

## Correlation Matrix
data_set_corr = data_set
data_set_corr['Mes'] = data_set_corr['Mes'].map({
    'ENE': 1,
    'FEB': 2,
    'MAR': 3,
    'ABR': 4,
    'MAY': 5,
    'JUN': 6,
Exemplo n.º 46
0
ax_histx = plt.axes(rect_histx)
ax_histx.tick_params(direction='in', labelbottom=False)
ax_histy = plt.axes(rect_histy)
ax_histy.tick_params(direction='in', labelleft=False)

# the scatter plot:
ax_scatter.scatter(x, y)

# now determine nice limits by hand:
binwidth = 0.25
lim = np.ceil(np.abs([x, y]).max() / binwidth) * binwidth
ax_scatter.set_xlim((-lim, lim))
ax_scatter.set_ylim((-lim, lim))

bins = np.arange(-lim, lim + binwidth, binwidth)
ax_histx.hist(x, bins=bins)
ax_histy.hist(y, bins=bins, orientation='horizontal')

ax_histx.set_xlim(ax_scatter.get_xlim())
ax_histy.set_ylim(ax_scatter.get_ylim())

plt.show()

# Seaborn version
import numpy as np
import seaborn as sns
#sns.set(style="ticks")

sns.jointplot(x, y)
sns.jointplot(x, y, kind="hex", color="#4CB391")
#Visulization
matplotlib.rcdefaults()

plt.show(df.plot(kind = 'box'))

pd.options.display.mpl_style = 'default' # Sets the plotting display theme to ggplot2
df.plot(kind = 'box')

sns.boxplot(data=df,width=0.5)
sns.violinplot(df,width=3.5)

plt.show(sns.distplot(df.ix[:,2], rug = True, bins = 15))

with sns.axes_style("white"):
    plt.show(sns.jointplot(df.ix[:,1],df.ix[:,2], kind = "kde"))

plt.show(sns.lmplot("Benguet","Ifugao",df))

#Creating custom function
def add_2int(x,y):
    return x+y
print(add_2int(2,2))

# an algorithm example
def case(n=10,mu=3,sigma=np.sqrt(5),p=0.025,rep=100):
    m=np.zeros((rep,4))

    for i in range(rep):
        norm = np.random.normal(loc = mu, scale = sigma, size = n)
        xbar = np.mean(norm)
Exemplo n.º 48
0
df.head()

import matplotlib.pyplot as plt
import seaborn as sns

df.groupby('title')['rating'].mean().sort_values(ascending=False).head()
df.groupby('title')['rating'].count().sort_values(ascending=False).head()

ratings = pd.DataFrame(df.groupby('title')['rating'].mean())

ratings['numRatings'] = pd.DataFrame(df.groupby('title')['rating'].count())
ratings.head()

ratings['numRatings'].hist(bins=100, figsize=(10, 6))
ratings['rating'].hist(bins=100, figsize=(10, 6))
sns.jointplot(x='rating', y='numRatings', data=ratings, alpha=0.6)
# as the number of ratings goes up, so does the average rating

moviemat = df.pivot_table(index='user_id', columns='title', values='rating')

moviemat.head()

ratings.sort_values('numRatings', ascending=False).head(10)

starwars_user_ratings = moviemat['Star Wars (1977)']
liarliar_user_ratings = moviemat['Liar Liar (1997)']

# This will show how people who have seen star wars rate other movies
similar_to_starwars = moviemat.corrwith(starwars_user_ratings)
similar_to_liarliar = moviemat.corrwith(liarliar_user_ratings)
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

#sns.residplot(x='age',y='fare',data=tips,color='indianred')
# Generate a green residual plot of the regression between 'hp' and 'mpg'

auto = pd.read_csv('auto.csv')


# Generate a joint plot of 'hp' and 'mpg'
sns.jointplot(x = 'hp', y = 'mpg', data = auto)

# Display the plot
plt.show()
Exemplo n.º 50
0
axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes1.scatter(j_day, dw_solar_everyday, label='Observed dw_solar', color='red')
axes1.scatter(j_day, ghi_everyday, label='Clear Sky GHI', color='green')

axes1.set_xlabel('Days')
axes1.set_ylabel('Solar Irradiance (Watts /m^2)')
axes1.set_title('Solar Irradiance - Test Year 2009')
axes1.legend(loc='best')

fig.savefig('RNN Paper Results/Exp2_1/' + test_location + '/' + test_year +
            'Figure 2.jpg',
            bbox_inches='tight')

# In[525]:

sns.jointplot(x=dw_solar_everyday, y=ghi_everyday, kind='reg')
plt.xlabel('Observed global downwelling solar (Watts/m^2)')
plt.ylabel('Clear Sky GHI (Watts/m^2)')
plt.savefig('RNN Paper Results/Exp2_1/' + test_location + '/' + test_year +
            'Figure 3',
            bbox_inches='tight')

# ### making the Kt (clear sky index at time t) column by first removing rows with ghi==0

# In[526]:

if run_train:
    # TRAIN dataset
    df_train = df_train[df_train['ghi'] != 0]
    df_train['Kt'] = df_train['dw_solar'] / df_train['ghi']
    df_train.reset_index(inplace=True)
Exemplo n.º 51
0
#mu = np.array([-0.5, -2.5])
size = 1000000 # at 10 million my RAM is overloaded

### If a vector X is normally distributed, then exp(X) is lognormally distributed with the same mean and variance

log_data = np.random.multivariate_normal(mu,cov, size=size)
level_data = np.exp(log_data)
k = level_data[:,1]
z = level_data[:,0]
lnk = log_data[:,1]
lnz = log_data[:,0]


### Plotting the joint density functions for levels and for logs
## First levels
sns.jointplot(k,z,kind="hex").set_axis_labels("Capital", "Productivity")
plt.show()

sns.jointplot(lnk,lnz,kind="hex").set_axis_labels("Log Capital", "Log Productivity")
plt.show()
'''
## Plotting the raw joint density of lognormal variables does not make much sense as in 10,000,000 observations there will be massive outliers
### I atempt to get rid of these outliers for plotting purposes

meank = np.mean(k)
sdk = np.std(k)
final_k = [x for x in k if (x > meank - 2 * sdk)]
final_k = [x for x in final_k if (x < meank + 2 * sdk)]


meanz = np.mean(z)
Exemplo n.º 52
0
def viz_cont_cont(df, features, target):
    for feature in features:
        sns.jointplot(x=feature, y=target, data=df)
merged_df.popularity.plot.hist(bins=50, color='green')
# explore vote_average distribution
# appear to be almost normal distribution
merged_df.vote_average.plot.hist(bins=50, color='red')
# to fix popularity, we will remove vote_count under 10 to prevent bias
merged_df = merged_df[~(merged_df.vote_count < 10)]
# replot
merged_df.popularity.plot.hist(bins=50, color='blue',
                               alpha=0.5)  # appear to be better
# plot scatter and find r2 for popularity versus domestic_gross columns
# before plot, we want to convert the scale into log10 and need to remove 0s
merged_df = merged_df[~(merged_df.domestic_gross == 0)]
merged_df = merged_df[~(merged_df.worldwide_gross == 0)]
merged_df.to_pickle('budget_popularity.pkl')
sns.jointplot(merged_df['popularity'],
              np.log10(merged_df['domestic_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['popularity'],
              np.log10(merged_df['worldwide_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['vote_average'],
              np.log10(merged_df['domestic_gross']),
              kind="reg",
              stat_func=hf.r2)
sns.jointplot(merged_df['vote_average'],
              np.log10(merged_df['worldwide_gross']),
              kind="reg",
              stat_func=hf.r2)
# popularity is R2 is 0.3 while vote_average is 0.051, we will use popularity as a metric to estimate gross income
# we will use popularity to estimate how well genres perform using tmdb data frame
Exemplo n.º 54
0
for a, b in product(features, plottables):
    msg('Making %s %s' % (a, b))
    x = with_elo[a]
    y = with_elo[b]
    msg('type = %s' % x.dtype)
    if x.dtype == 'object':
        plt.figure()
        x.value_counts().plot(kind='bar')
        plt.savefig('/data/' + a + '_hist.png')
        plt.close('all')
    else:
        try:
            xlim = tuple(np.percentile(x, [1, 99]))
            ylim = tuple(np.percentile(y, [1, 99]))
            with sns.axes_style("white"):
                sns.jointplot(x, y, kind="hex", xlim=xlim, ylim=ylim)
            plt.savefig('/data/scatter_' + a + '_' + b + '.png')
            plt.close('all')
        except:
            #        sns.violinplot(x, y)
            #        plt.savefig('/data/' + a + '_' + b + '.png')
            #        plt.close()
            plt.figure()
            x.plot(kind='hist')
            plt.savefig('/data/' + a + '_hist.png')
            plt.close('all')

do_indivs = True
if do_indivs:
    for a, b in product(features, plottables):
        msg('Making %s %s' % (a, b))
Exemplo n.º 55
0
file_out_figures = 'C:/Users/lalc/Documents/Old Documents folder/PhD/Meetings/July 2020/'
file = ['U','UN','N','SN']  
limits = [[-np.inf,-.1], [-.1,-.01], [-.01,.01], [.01,.21], [.21,np.inf]]  
limits = [[-np.inf,-.1], [-.1,-.01], [-.01,.01], [.01,.21]]
     
relind = L30min1.relscan>.25
j = -2
for i,l in enumerate(limits):
    stabind = ((Ri1[:,j]>l[0]) & (Ri1[:,j]<l[1]))
    cols = np.r_[['$L_{u_1,x_1}$', '$L_{u_1,x_2}$','$L_{v_1,x_1}$', '$L_{v_1,x_2}$','$L_{h,x_1}$', '$L_{h,x_2}$'], L30min1.columns [6:]]
    L30min1.columns = cols
    xlim = 5*200
    ylim = 5*200
    g = sns.jointplot(x ='$L_{h,x_1}$', y = '$L_{h,x_2}$', data=L30min1.loc[relind & stabind & ind1], 
                            height = 8, kind="kde", cmap="jet", xlim = (0,xlim), ylim = (0,ylim),
                            color='k')#,cbar=True, cbar_kws={"format": formatter, "label": '$Density$'})
    g.set_axis_labels('$L_{h,x_1}$', '$L_{h,x_2}$', fontsize = 24)
    g.ax_joint.plot([0,xlim],[0,ylim],'--k', linewidth = 2)
    g.ax_joint.plot(L30min1.loc[relind & stabind & ind1]['$L_{h,x_1}$'].values,L30min1.loc[relind & stabind & ind1]['$L_{h,x_2}$'].values,'o', color = 'k', alpha=.2)
    g.ax_joint.text(100, 800,'$'+'%.2f' % l[0] +'<Ri_f<'+'%.2f' % l[1] +'$',fontsize=30,color='r')
    plt.tight_layout()
    plt.savefig(file_out_figures+file[i]+'_phase_1.png')


file = ['U','UN','N','SN','VS']       
relind = L30min2.relscan>.25
for i,l in enumerate(limits):
    stabind = ((Ri2[:,-2]>l[0]) & (Ri2[:,-2]<l[1]))
    cols = np.r_[['$L_{u_1,x_1}$', '$L_{u_1,x_2}$','$L_{v_1,x_1}$', '$L_{v_1,x_2}$','$L_{h,x_1}$', '$L_{h,x_2}$'], L30min2.columns [6:]]
    L30min2.columns = cols
Exemplo n.º 56
0
# que pasa por los valores, lo desactivamos asi

sns.distplot(tips['total_bill'],kde=False)
plt.show()

# podemos modificar la cantidad de bins que son la barras,
# con el parametro bins solo pasando un int, hay que tener
# cuidado con el tamaño del bin

sns.distplot(tips['total_bill'],kde=False,bins=40)
plt.show()

# tenemos un metodos que nos compara dos columnas dentro de
# un dataset

sns.jointplot(x='total_bill',y='tip',data=tips)
plt.show()

# podemos graficar esto de varias maneras con el parametro 
# kind usando: hex, reg, kde

# este otro metodo nos muestra una serie de graficas comparando
# todas las columnas con todas, cuando se compara con si mismo,
# muestra un histogram, y cuando es con otro, es un jointplot()

sns.pairplot(tips)
plt.show()

# si queremos dividir la informacion de cada grafica por otras
# columnas por ejemplo por sexo usamos el parametro hue, se le
# pasa una columa categorial, no que tenga un valor por eso 
Exemplo n.º 57
0
Next compare the distributions of the positive and negative examples over a few features. 
Good questions to ask yourself at this point are:

	* Do these distributions make sense?
		+ Yes. You've normalized the input and these are mostly concentrated in the +/- 2 range.
	* Can you see the difference between the ditributions?
		+ Yes the positive examples contain a much higher rate of extreme values.
-----------------------------------------------------------------------------------------
'''
pos_df = pd.DataFrame(train_features[ bool_train_labels], columns = train_df.columns)
neg_df = pd.DataFrame(train_features[~bool_train_labels], columns = train_df.columns)

sns.jointplot(
    pos_df['V5'], 
    pos_df['V6'],
    kind='hex', 
    xlim = (-5,5), 
    ylim = (-5,5)
)

plt.suptitle("Positive distribution")

sns.jointplot(
    neg_df['V5'], 
    neg_df['V6'],
    kind='hex', 
    xlim = (-5,5), 
    ylim = (-5,5)
)

_ = plt.suptitle("Negative distribution")

# Histogram
sns.distplot(a = iris_data['Petal Length (cm)'], kde=False)



# Kernel Density Estimate (kde)
# This is the smoothed histogram

# kde plot
sns.kdeplot(data=iris_data['Petal Length (cm)'], shade=True)


# We can create two-dimensional kde plot
sns.jointplot(x=iris_data['Petal Length (cm)'],
              y=iris_data['Sepal Width (cm)'], kind='kde')








# Let split the data to understand difference btw species

iris_set_data = pd.read_csv('data/iris_setosa.csv', index_col="Id")
iris_ver_data = pd.read_csv('data/iris_versicolor.csv', index_col="Id")
iris_vir_data = pd.read_csv('data/iris_virginica.csv', index_col="Id")

Exemplo n.º 59
0
def explore_global_plot(data, label='label', n_feats=50, id=None, task='classification'):
    '''
    :param data: DataFrame
    :param label: label column name in the data
    :param n_feats: the number of features be used to analysis.
    :param task: regression or classification
    :return:
    '''
    columns = data.columns.tolist()
    columns.remove(label)

    if id is not None:
        if columns[id].duplicated().sum():
            print('{} is duplicated !!!'.format(id))

        columns.remove(id)
        data.drop(id, axis=1, inplace=True)

    numeric_features = [True if any([ptypes.is_integer_dtype(i),ptypes.is_int64_dtype(i),ptypes.is_float_dtype(i)]) else False for i in data[columns].dtypes]
    numeric_names = [columns[i] for i, v in enumerate(numeric_features) if v]
    category_names = list(set(columns) - set(numeric_names))

    if task == 'classification':
        if len(category_names):
            # data distribution for each class
            new_data = data.dropna(axis=0)
            famd = prince.FAMD(
                n_components=2,
                n_iter=3,
                copy=True,
                check_input=True,
                engine='auto',
                random_state=42
            )
            famd = famd.fit(new_data[columns])
            ax = famd.plot_row_coordinates(
                new_data,
                ax=None,
                x_component=0,
                y_component=1,
                labels=new_data.index,
                color_labels=['{}'.format(t) for t in new_data[label]],
                ellipse_outline=False,
                ellipse_fill=True,
                show_points=True
            )
            plt.show()
        else:
            new_data = data.dropna(axis=0)
            pca = PCA(n_components=2, random_state=seed)
            X_pca = pca.fit_transform(new_data[columns])
            sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=label, data=new_data)
            plt.show()

    # sort features for correlation plot
    sorted_feat_name = numeric_names
    if len(numeric_names) > 6:
        n_clusters = 3
        new_data = data[[label] + numeric_names].dropna(axis=0)
        new_data_feat = new_data[numeric_names]
        new_data_stand = StandardScaler().fit_transform(new_data_feat)
        kmean_init = KMeans(n_clusters=n_clusters, random_state=seed)
        new_data_kmean=kmean_init.fit_transform(
            new_data_stand.reshape(len(numeric_names), -1))
        sorted_feat = sorted(zip(numeric_names, kmean_init.labels_), key=lambda x: x[1])
        sorted_feat_name = [i[0] for i in sorted_feat]

    # correlation plot for all features
    sns.heatmap(data[[label] + sorted_feat_name + category_names].corr())
    plt.show()

    # outlier detection just for numeric features
    outlier = data[numeric_names].apply(mad_based_outlier)
    for i, column in enumerate(outlier.columns):
        print('outlier:\n {}'.format(data[[column]][outlier.iloc[:, i]]))

    # missing value pattern plot for all features
    msno.matrix(data[columns[:n_feats]])
    plt.show()

    msno.bar(data[columns[:n_feats]])
    plt.show()

    miss_data = data[columns[:n_feats]].isnull().sum(axis=1)
    miss_data = miss_data.to_frame()
    miss_data.columns = ['number_of_missing_attributes']
    miss_data.sort_values('number_of_missing_attributes', inplace=True)
    miss_data['index'] = list(range(0, miss_data.shape[0]))
    sns.jointplot(x="index", y="number_of_missing_attributes", data=miss_data)
    plt.show()
Exemplo n.º 60
0
def analyze_zN(z, outdir, vg, skip_umap=False, num_pcs=2, num_ksamples=20):
    zdim = z.shape[1]

    # Principal component analysis
    log('Perfoming principal component analysis...')
    pc, pca = analysis.run_pca(z)  
    log('Generating volumes...')
    for i in range(num_pcs):
        start, end = np.percentile(pc[:,i],(5,95))
        z_pc = analysis.get_pc_traj(pca, z.shape[1], 10, i+1, start, end)
        vg.gen_volumes(f'{outdir}/pc{i+1}', z_pc)

    # kmeans clustering
    log('K-means clustering...')
    K = num_ksamples
    kmeans_labels, centers = analysis.cluster_kmeans(z, K)
    centers, centers_ind = analysis.get_nearest_point(z, centers)
    if not os.path.exists(f'{outdir}/kmeans{K}'): 
        os.mkdir(f'{outdir}/kmeans{K}')
    utils.save_pkl(kmeans_labels, f'{outdir}/kmeans{K}/labels.pkl')
    np.savetxt(f'{outdir}/kmeans{K}/centers.txt', centers)
    np.savetxt(f'{outdir}/kmeans{K}/centers_ind.txt', centers_ind, fmt='%d')
    log('Generating volumes...')
    vg.gen_volumes(f'{outdir}/kmeans{K}', centers)

    # UMAP -- slow step
    if zdim > 2 and not skip_umap:
        log('Running UMAP...')
        umap_emb = analysis.run_umap(z)
        utils.save_pkl(umap_emb, f'{outdir}/umap.pkl')

    # Make some plots
    log('Generating plots...')
    plt.figure(1)
    g = sns.jointplot(x=pc[:,0], y=pc[:,1], alpha=.1, s=2)
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/z_pca.png')
    
    plt.figure(2)
    g = sns.jointplot(x=pc[:,0], y=pc[:,1], kind='hex')
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/z_pca_hexbin.png')

    if zdim > 2 and not skip_umap:
        plt.figure(3)
        g = sns.jointplot(x=umap_emb[:,0], y=umap_emb[:,1], alpha=.1, s=2)
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/umap.png')

        plt.figure(4)
        g = sns.jointplot(x=umap_emb[:,0], y=umap_emb[:,1], kind='hex')
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/umap_hexbin.png')

    analysis.scatter_annotate(pc[:,0], pc[:,1], centers_ind=centers_ind, annotate=True)
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.savefig(f'{outdir}/kmeans{K}/z_pca.png')

    g = analysis.scatter_annotate_hex(pc[:,0], pc[:,1], centers_ind=centers_ind, annotate=True)
    g.set_axis_labels('PC1','PC2')
    plt.tight_layout()
    plt.savefig(f'{outdir}/kmeans{K}/z_pca_hex.png')

    if zdim > 2 and not skip_umap:
        analysis.scatter_annotate(umap_emb[:,0], umap_emb[:,1], centers_ind=centers_ind, annotate=True)
        plt.xlabel('UMAP1')
        plt.ylabel('UMAP2')
        plt.savefig(f'{outdir}/kmeans{K}/umap.png')

        g = analysis.scatter_annotate_hex(umap_emb[:,0], umap_emb[:,1], centers_ind=centers_ind, annotate=True)
        g.set_axis_labels('UMAP1','UMAP2')
        plt.tight_layout()
        plt.savefig(f'{outdir}/kmeans{K}/umap_hex.png')

    for i in range(num_pcs):
        if zdim > 2 and not skip_umap:
            analysis.scatter_color(umap_emb[:,0], umap_emb[:,1], pc[:,i], label=f'PC{i+1}')
            plt.xlabel('UMAP1')
            plt.ylabel('UMAP2')
            plt.tight_layout()
            plt.savefig(f'{outdir}/pc{i+1}/umap.png')