Пример #1
0
def plot_bourgdata(N1,N2):
	A=TRICLAIRModele()
	Tb15 = A.get_data_triathlon(link='/triathlon-bourg-resultats-1996.htm',year=2015)
	Tb14 = A.get_data_triathlon(link='/triathlon-bourg-resultats-1715.htm',year=2014)	
	S15_ = map(lambda x: x.total_seconds()/60,Tb15['Scratch'].dropna())
	S14_ = map(lambda x: x.total_seconds()/60,Tb14['Scratch'].dropna())
	
	S15 = S15_[N1:N2]
	S14 = S14_[N1:N2]

	(mu14, sigma14) = norm.fit(S14)
	(mu15, sigma15) = norm.fit(S15)

	N_BINS = 50
	fig = plt.figure()
	ax = fig.add_subplot(1, 1, 1)
	n, bins, patches = ax.hist(S14, N_BINS,normed=1, facecolor='red', alpha=0.5,label=r'$\mathrm{2014:}\ \mu=%.3f,\ \sigma=%.3f$' %(mu14, sigma14))
	y = mlab.normpdf( bins, mu14, sigma14)
	l = ax.plot(bins, y, 'r-', linewidth=4)
	n, bins, patches = ax.hist(S15, N_BINS, normed=1, facecolor='green', alpha=0.5,label=r'$\mathrm{ 2015:}\ \mu=%.3f,\ \sigma=%.3f$' %(mu15, sigma15))
	y = mlab.normpdf( bins, mu15, sigma15)
	l = ax.plot(bins, y, 'g-', linewidth=4)

	fig.tight_layout()
	ax.set_xlabel('Scratch Time (minutes)')
	ax.set_ylabel('Number of athletes per scratch time (normalized)')
	ax.legend(loc='best', fancybox=True, framealpha=0.5)
	ax.set_title(r'$\mathrm{Athletes\ from\ rank\ } %d \mathrm{\ to\ } %d$' %(N1, N2))

	plt.show()
	
Пример #2
0
def get_sent_similarity(user_data):
	
	scores=[]

	#=====[ Creates counts for each sentiment score in 21 buckets of width 0.1 from -1 to 1  ]=====
	for data in user_data:
		user_score = [0]*21
		for tweet in data:
			score = int(float("%.1f" % tweet['score'])*10+10)
			user_score[score] += 1
		scores.append(user_score)

	#=====[ Forms normalized probability distributions for each users sentiments  ]=====
	x = np.linspace(-1, 1, 100)
	
	mu, std = norm.fit(scores[0])
	p = norm.pdf(x, mu, std)
	mu, std = norm.fit(scores[1])
	p2 = norm.pdf(x,mu,std)
	
	#=====[ Takes Kullback-Leibler Divergence between probability distributions  ]=====
	similarity = float("%.5f" % scipy.stats.entropy(p,p2))

	#=====[ Converts similarity score to a percentage from 10 - 90 to display on compatability spectrum  ]=====
	if similarity < 0.003: 
		return 90
	elif similarity > 0.07:
		return 10
	else:
		return int(10 + ((similarity*100)-1)/6.7*80)

	return int(similarity)
Пример #3
0
def compare_dlospeak_fit_test_to_normfit(): 
    """ Kind of stupid test to compare the dlospeak_fit function to 
    scipy.stat.norm.fit. Obviously the values for sigma are not the
    same because sigma =/= standard deviation of dLOS peak values 
    because dLOS peak is not a Gaussian. It has heavier tails. 

    
    Also tests that normlizating the dLOS distribution does NOT 
    change the sigma or fpeak fit values! MPfit does a good job. 

    """

    cat_corr = { 
            'catalog': {'name': 'nseries', 'n_mock': 1}, 
            'correction': {'name': 'upweight'}
            }

    dlosclass = Dlos(cat_corr)
    dlosclass.read() 

    print dlospeak_fit_test(dlosclass.dlos, fit = 'gauss', peak_range = [-15.0, 15.0])
    print dlospeak_fit_test(dlosclass.dlos, fit = 'gauss', peak_range = [-15.0, 15.0], normed=True)
    
    inpeak = np.where(
            (dlosclass.dlos < 15.0) & 
            (dlosclass.dlos > -15.0)
            ) 
    print norm.fit(dlosclass.dlos[inpeak])
    print np.std(dlosclass.dlos[inpeak])

    return None 
Пример #4
0
def plotter( fdict ):
    """ Go """
    pgconn = psycopg2.connect(database='coop', host='iemdb', user='******')
    cursor = pgconn.cursor(cursor_factory=psycopg2.extras.DictCursor)

    month = int(fdict.get('month', 10))
    day = int(fdict.get('day', 7))
    state = fdict.get('state', 'IA')
    table = "alldata_%s" % (state,)
    cursor.execute("""
    SELECT high, low from """+table+""" where sday = %s and
    high is not null and low is not null
     """, ("%02i%02i" % (month, day),))
    highs = []
    lows = []
    for row in cursor:
        highs.append(row[0])
        lows.append(row[1])
    highs = np.array(highs)
    lows = np.array(lows)
    
    (fig, ax) = plt.subplots(1,1)

    ax.hist(highs, bins=(np.max(highs)-np.min(highs)), 
                              histtype='step', normed=True,
            color='r',  zorder=1)
    mu, std = norm.fit(highs)
    xmin, xmax = plt.xlim()
    x = np.linspace(xmin, xmax, 100)
    p = norm.pdf(x, mu, std)
    ax.plot(x, p, 'r--', linewidth=2)

    ax.text(0.99, 0.99, "High Temp\n$\mu$ = %.1f$^\circ$F\n$\sigma$ = %.2f" % (
                                                    mu, std),
            va='top', ha='right', color='r',
            transform=ax.transAxes)

    ax.hist(lows, bins=(np.max(highs)-np.min(highs)), 
                              histtype='step', normed=True,
            color='b',  zorder=1)
    mu, std = norm.fit(lows)
    xmin, xmax = plt.xlim()
    x = np.linspace(xmin, xmax, 100)
    p = norm.pdf(x, mu, std)
    ax.plot(x, p, 'b--', linewidth=2)
    
    ts = datetime.datetime(2000, month, day)
    ax.set_title("%s %s Temperature Distribution" % (STATES[state],
                                                     ts.strftime("%d %B")))

    ax.text(0.01, 0.99, "Low Temp\n$\mu$ = %.1f$^\circ$F\n$\sigma$ = %.2f" % (
                                                    mu, std),
            va='top', ha='left', color='b',
            transform=ax.transAxes)
    ax.grid(True)
    ax.set_xlabel("Temperature $^\circ$F")
    ax.set_ylabel("Probability")
    
    return fig
Пример #5
0
 def getVelStat(self):
     """
     this method calculates the data's ensamble mean and standard
     deviation values and assigns them to new atribtes
     of the instance vec.
     """
     u,v = self.u.flatten(), self.v.flatten()
     self.Umean, self.Ustd = norm.fit(u)
     self.Vmean, self.Vstd = norm.fit(v)
Пример #6
0
def create_scatter_hist(trans_data,sigma): 
    nullfmt   = NullFormatter()         # no labels
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    bottom_h = left_h = left+width+0.02
    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom_h, width, 0.2]
    rect_histy = [left_h, bottom, 0.2, height]
    fig = plt.figure(1,figsize=(8,8))
    axScatter = fig.add_subplot(223, position=rect_scatter)
    plt.xlabel(r'$log_{10}(\eta_{\nu})$', fontsize=16)
    plt.ylabel(r'$log_{10}(V_{\nu})$', fontsize=16)
    axHistx=fig.add_subplot(221, position=rect_histx)
    axHisty=fig.add_subplot(224, position=rect_histy)
    axHistx.xaxis.set_major_formatter(nullfmt)
    axHisty.yaxis.set_major_formatter(nullfmt)

    col=['r','b','g','y']
    for i in range(len(frequencies)):
        xdata=[np.log10(trans_data[n][1]) for n in range(len(trans_data)) if trans_data[n][6]==frequencies[i] if trans_data[n][1] > 0 if trans_data[n][3] > 0]
        ydata=[np.log10(trans_data[n][3]) for n in range(len(trans_data)) if trans_data[n][6]==frequencies[i] if trans_data[n][1] > 0 if trans_data[n][3] > 0]
        axScatter.scatter(xdata, ydata,color=col[i], s=5.)
    axScatter.legend(frequencies,loc=4)
    x=[np.log10(trans_data[n][1]) for n in range(len(trans_data)) if trans_data[n][1] > 0 if trans_data[n][3] > 0]
    y=[np.log10(trans_data[n][3]) for n in range(len(trans_data)) if trans_data[n][1] > 0 if trans_data[n][3] > 0]
    bins = 50
    param=norm.fit(x)
    range_x=np.linspace(min(x),max(x),1000)
    fit=norm.pdf(range_x,loc=param[0],scale=param[1])
    sigcutx = param[1]*sigma+param[0]
    axHistx.axvline(x=sigcutx, linewidth=2, color='k', linestyle='--')
    axHistx.plot(range_x,fit, 'k:', linewidth=2)
    param2=norm.fit(y)
    range_y=np.linspace(min(y),max(y),1000)
    fit2=norm.pdf(range_y,loc=param2[0],scale=param2[1])
    sigcuty = param2[1]*sigma+param2[0]
    axHisty.axhline(y=sigcuty, linewidth=2, color='k', linestyle='--')
    axScatter.axhline(y=sigcuty, linewidth=2, color='k', linestyle='--')
    axScatter.axvline(x=sigcutx, linewidth=2, color='k', linestyle='--')
    axHisty.plot(fit2, range_y, 'k:', linewidth=2)
    axHistx.hist(x, bins=bins, normed=1, histtype='stepfilled', color='b')
    axHisty.hist(y, bins=bins, normed=1, histtype='stepfilled', orientation='horizontal', color='b')
    axHistx.set_xlim( axScatter.get_xlim() )
    axHisty.set_ylim( axScatter.get_ylim() )
#    xvals=[-3., -2., -1., 0., 1., 2., 3.]
#    xtxts=[str(10.**a) for a in xvals]
#    yvals=[-2., -1., 0.]
#    ytxts=[str(10.**a) for a in xvals]
#    axScatter.set_xticks(xvals)
#    axScatter.set_xticklabels(xtxts)
#    axScatter.set_yticks(yvals)
#    axScatter.set_yticklabels(ytxts)
    plt.savefig('scatter_hist.png')
    plt.close()
    return
Пример #7
0
    def fit(self, X, y, sample_weight=None):
        # the new coordinate system based on the training X
        self.dm=DiffusionMap(X,self.eps_par)
        mindist = self.dm.onePercentDistances()
        numpy.log(mindist,mindist)
        mu, std = norm.fit(mindist)
        wok=numpy.abs(mindist-mu)/std < 3
        mu, std = norm.fit(mindist[wok])
        self.dm.par = (numpy.exp(mu+self.eps_par*std))**2
        self.dm.make_map()

        return self.estimator.fit(self.dm.dmap.X, y, sample_weight=sample_weight)
Пример #8
0
def sentiment_dist():
    data, dif = loadFeatures(), []
    pos = array(data['pos'], float)
    neg = array(data['neg'], float)
    pmean, pstd = norm.fit(pos)
    nmean, nstd = norm.fit(neg)
    print 'positive: mean, std: ', pmean, pstd
    print 'negative: mean, std: ', nmean, nstd    
    
    for i in range(len(pos)):
        dif.append(pos[i] - neg[i])
    dmean, dstd = norm.fit(dif)
    print 'delta: mean, std: ', dmean, dstd
Пример #9
0
    def fit(self, x, y):
        del(self.catastrophe)
        del(self.max_distance)
        del(self.mask_scale)
        del(self.dm)

        self.catastrophe = numpy.logical_or(y[:,0] < self.zmin,
            numpy.logical_or(y[:,0] > self.zmax, y[:,1] < self.oiimin))


        if False: #os.path.isfile('estimator.pkl'):
            #print 'get pickle'
            pklfile=open('estimator.pkl','r')
            self.dm=pickle.load(pklfile)
        else:
            # the new coordinate system based on the training data
            data = Data(x,y,numpy.zeros(len(y)),xlabel=self.xlabel,ylabel=self.ylabel)
            self.dm=DiffusionMap(data,self.eps_par)
            mindist = self.dm.data_mindist()
            mindist[mindist < sys.float_info.min]=(mindist[mindist > sys.float_info.min]).min()
            mindist= numpy.log(mindist)
            mu, std = norm.fit(mindist)
            wok=numpy.abs(mindist-mu)/std < 3
            mu, std = norm.fit(mindist[wok])
            self.dm.par = numpy.exp(mu+self.eps_par*std)
            self.dm.make_map()

        #     pklfile=open('estimator.pkl','w')
        #     pickle.dump(self.dm,pklfile)
        # pklfile.close()
        # self.dm=DiffusionMap(x,self.eps_par)
        # self.dm.make_map()


        train_dist = sklearn.metrics.pairwise_distances(self.dm.data_dm().x,self.dm.data_dm().x)
        # catastrophe_distances = train_dist[numpy.outer(self.catastrophe,self.catastrophe)]
        # catastrophe_distances = catastrophe_distances[catastrophe_distances !=0]
        # catastrophe_distances = numpy.sort(catastrophe_distances)
        numpy.fill_diagonal(train_dist,train_dist.max()) #numpy.finfo('d').max)
        train_min_dist = numpy.min(train_dist,axis=0)
        train_min_dist = numpy.sort(train_min_dist)
        train_min_dist[train_min_dist < sys.float_info.min]=(train_min_dist[train_min_dist > sys.float_info.min]).min()
        catastrophe_min_dist = train_min_dist[self.catastrophe] 
        catastrophe_min_dist=numpy.log(catastrophe_min_dist)
        mu, std = norm.fit(catastrophe_min_dist)
        wok=numpy.abs(catastrophe_min_dist-mu)/std < 3
        mu, std = norm.fit(catastrophe_min_dist[wok])
        self.max_distance = train_min_dist[x.shape[0]*self.outlier_cut]
        self.mask_scale = numpy.exp(mu+std*self.mask_var)
def getDistribution(filePath, refMeasurements_ds, refMeasurements_dth):
    #List containing all corner measurements
    measurements_ds = dE.getMeasurements(filePath,'ds =')
    measurements_dth = dE.getMeasurements(filePath,'dth =')
    
    # Measurements which are not due to random noise
    filteredMeasurements_ds = checkReference(refMeasurements_ds, measurements_ds, 0.2) 
    filteredMeasurements_dth = checkReference(refMeasurements_dth, measurements_dth, 0.2)  
    
    # Getting normal distribution parameters
    mu_ds, std_ds = norm.fit(filteredMeasurements_ds)
    mu_dth, std_dth = norm.fit(filteredMeasurements_dth)
    
#    return mu, std, allDifferences
    return mu_ds, std_ds, mu_dth, std_dth
Пример #11
0
    def plot_histplot(self, files):
        # best fit of data
        (mu, sigma) = norm.fit(self.y)
        print "mu and sigma: " + str(mu) + ", " + str(sigma) + ""
        # Make the hist plot
        plt.figure(figsize=(12, 6))
        binwidth = 0.1
        color = "dodgerblue"

        # the histogram of the data
        n, bins, patches = plt.hist(
            self.y, normed=1, color=color, bins=np.arange(min(self.y), max(self.y) + binwidth, binwidth)
        )  #

        # add a 'best fit' line
        fit = mlab.normpdf(bins, mu, sigma)
        l = plt.plot(bins, fit, "b--", linewidth=2)

        plt.xlabel(u"Distance [Å]")
        plt.ylabel(u"Probability")
        title = "$\mathrm{Histogram\ of: \ " + files + "}$"
        title = title.replace("_", "\_")
        plt.title(r"" + title + "$\ \ \mu=%.3f,\ \sigma=%.3f$" % (mu, sigma))
        plt.savefig("plots/" + files + "_hist.png")
        plt.close()
Пример #12
0
def plot_t_value_hist(
	img_path='~/ni_data/ofM.dr/l1/as_composite/sub-5703/ses-ofM/sub-5703_ses-ofM_task-EPI_CBV_chr_longSOA_tstat.nii.gz',
	roi_path='~/ni_data/templates/roi/DSURQEc_ctx.nii.gz',
	mask_path='/usr/share/mouse-brain-atlases/dsurqec_200micron_mask.nii',
	save_as='~/qc_tvalues.pdf',
	):
	"""Make t-value histogram plot"""

	f, axarr = plt.subplots(1, sharex=True)

	roi = nib.load(path.expanduser(roi_path))
	roi_data = roi.get_data()
	mask = nib.load(path.expanduser(mask_path))
	mask_data = mask.get_data()
	idx = np.nonzero(np.multiply(roi_data,mask_data))
	img = nib.load(path.expanduser(img_path))
	data = img.get_data()[idx]
	(mu, sigma) = norm.fit(data)
	n, bins, patches = axarr.hist(data,'auto',normed=1, facecolor='green', alpha=0.75)
	y = mlab.normpdf(bins, mu, sigma)

	axarr.plot(bins, y, 'r--', linewidth=2)
	axarr.set_title('Histogram of t-values $\mathrm{(\mu=%.3f,\ \sigma=%.3f}$)' %(mu, sigma))
	axarr.set_xlabel('t-values')
	plt.savefig(path.expanduser(save_as))
def plot_logistic_parameter_ratio(plot_conditions, plot_colors, control_condition, condition_logistic_params,
                                  xlim=[-.1,.2], ylim=[0,35]):
    fig=plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    xx=np.arange(-.512,.512,.001)
    mean_a0=np.mean(condition_logistic_params['a0'][control_condition])
    mean_a1=np.mean(condition_logistic_params['a1'][control_condition])
    mean_a2=np.mean(condition_logistic_params['a2'][control_condition])
    yy_r=1/(1+np.exp(-(mean_a0+mean_a1*xx+mean_a2*1)))
    yy_l=1/(1+np.exp(-(mean_a0+mean_a1*xx+mean_a2*-1)))
    ax.plot(xx,yy_l,'--', color=plot_colors[control_condition], linewidth=2, label='Left*')
    ax.plot(xx,yy_r,plot_colors[control_condition], linewidth=2, label='Right*')
    ax.legend(loc='best')
    ax.set_xlabel('coherence')
    ax.set_ylabel('P(R)')

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    xx=np.arange(xlim[0],xlim[1],0.001)
    binwidth=.02
    for condition in plot_conditions:
        ratio=np.array(condition_logistic_params['a2'][condition]) / np.array(condition_logistic_params['a1'][condition])
        bins=np.arange(min(ratio), max(ratio) + binwidth, binwidth)
        hist,edges=np.histogram(ratio, bins=bins)
        center = (bins[:-1] + bins[1:]) / 2
        ax.bar(center, hist/float(len(ratio))*100.0, color=plot_colors[condition], alpha=0.75, label=condition, width=binwidth)
        (mu, sigma) = norm.fit(ratio)
        y = normpdf(xx, mu, sigma)*binwidth*100.0
        ax.plot(xx, y, '--', color=plot_colors[condition], linewidth=2)
    ax.legend(loc='best')
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_xlabel('a2/a1')
    ax.set_ylabel('% subjects')
Пример #14
0
def PlotHistNorm(data, log=False):
    # distribution fitting
    param = norm.fit(data) 
    mean = param[0]
    sd = param[1]

    #Set large limits
    xlims = [-6*sd+mean, 6*sd+mean]

    #Plot histogram
    histdata = hist(data,bins=12,alpha=.3,log=log)

    #Generate X points
    x = linspace(xlims[0],xlims[1],500)

    #Get Y points via Normal PDF with fitted parameters
    pdf_fitted = norm.pdf(x,loc=mean,scale=sd)

    #Get histogram data, in this case bin edges
    xh = [0.5 * (histdata[1][r] + histdata[1][r+1]) for r in xrange(len(histdata[1])-1)]

    #Get bin width from this
    binwidth = (max(xh) - min(xh)) / len(histdata[1])           

    #Scale the fitted PDF by area of the histogram
    pdf_fitted = pdf_fitted * (len(data) * binwidth)

    #Plot PDF
    plot(x,pdf_fitted,'r-')
def plotDistribution(l,a,b,c=0,t="No title"):

    d = []
    if c==0:
        data1 = np.array(l[a:b])
        data2 = np.array(l[b:len(l)])

        d=[(data1,"c","Objective"), (data2,"r","Subjective")]
    else:
        data1 = np.array(l[a:b])
        data2 = np.array(l[b:c])
        data3 = np.array(l[c:len(l)])

        d=[(data1,"r","Negative"), (data2,"c","Objective"), (data3,"g","Positive")]

    for data in d:
        #fit a normal distribution to the data
        mu, std = norm.fit(data[0])
        lb = data[2]+" : mu = %.2f,  std = %.2f" % (mu, std)

        #plot histogram
        plt.hist(data[0], normed=True, alpha=0, color='g')

        # Plot the PDF.
        xmin, xmax = plt.xlim()
        x = np.linspace(xmin, xmax, 100)
        p = norm.pdf(x, mu, std)
        plt.plot(x, p, data[1], linewidth=2, label=lb)


    plt.title(t)
    plt.legend(loc="upper right")
    plt.xlabel("sentiment score")

    plt.show()
Пример #16
0
 def AnalysePredictor(self, train, predictor_transformation='none'):
     if self.predictor_name is None:
         raise TypeError("Execute the SetUpTrainTest method to use this feature")
         return
         
     #http://matplotlib.org/users/pyplot_tutorial.html
     if self.predictor_type == 'continuous':
         values = train[self.predictor_name]
         
         if predictor_transformation == 'log':
             values = np.log(values)
         else:
             predictor_transformation = 'none' #in case not supported transformation
         
         # fit the normal distribution on ln(loss)
         (mu, sigma) = norm.fit(values)
         
         # the histogram of the ln(loss)
         n, bins, patches = plt.hist(values, 60, normed=1, facecolor='green', alpha=0.75)
         
         # add the fitted line
         y = mlab.normpdf( bins, mu, sigma)
         l = plt.plot(bins, y, 'r--', linewidth=2)
         
         #plot
         plt.xlabel('Predictor: ' + self.predictor_name + ' - Transformation: ' +predictor_transformation)
         plt.ylabel('Probability')
         plt.title(r'$\mathrm{Histogram\ of\ Ln(Loss):}\ \mu=%.3f,\ \sigma=%.3f$' %(mu, sigma))
         plt.grid(True)
         
         plt.show()
     else:
         print 'predictor_type not implemented'
def plot_bandwith(file,subplot):
    bandwith = []
    time = []
    for line in file:
        l = line.split(",")
        if len(l) == 9: #the it is a client report
            transfered=l[7]
            Bps=float(l[8])
            time_=l[6].split('-')[0]
            if (Bps > 0.0):
                bandwith.append(Bps/1000)
                time.append(time_)
        else:
            #the server's report
            total_trans=l[7]
            average_bandwith=l[8]
            jitter=l[9]
            loss=l[10]
            total_pack=l[11]
            loss_rate =l[12]
    # average_bandwith=0
    # for i in bandwith:
    #     average_bandwith+=i
    # average_bandwith = average_bandwith/len(bandwith)
    # average=[]
    # stdp=[] #standar deviation positive
    # stdn =[] #standard deviation negative
    # sd = np.std(bandwith)
    # for i in bandwith:
    #     average.append(average_bandwith)
    #     stdp.append(average_bandwith+sd)
    #     stdn.append(average_bandwith-sd)

    # print "Average ", average_bandwith, "SD ",sd
    #plt.xlabel("Time (s)")
    #plt.ylabel("Bandwith (Kb)")
    #plt.plot(time[:-1],bandwith[:-1],"r.",label="Bandwith")
    #plt.plot(time[:-1],average[:-1],"b",label="Average")
    #plt.plot(time[:-1],stdp[:-1],"g",label="Standard deviation")
    #plt.plot(time[:-1],stdn[:-1],"g")
    #plt.legend()
    (mu,sigma) = norm.fit(bandwith[:-1])
    #print mu, sigma

   # plt.subplot(340+current)
   # plt.subplots(nrows=4,ncols=3)
    n , bins , patches = subplot.hist(bandwith[:-1], 30,normed=True,facecolor='green',alpha=1)
    #print bins
    #y = mlab.normpdf(bins,average_bandwith,sd)
    y = mlab.normpdf(bins,mu,sigma)
   # plt.xlabel("Bandwidth (Mbps)",fontsize=15)
    subplot.set_xlabel("Bandwidth (Mbps)",fontsize=4.5,style='italic')

    #subplot.plot(bins,y,'b-')
    subplot.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) ),linewidth=2, color='r')
   # subplot.subplots_adjust(left=0.15)
    #print file.name.split(":")[0].split("/")[1]
    print file.name.split(":")[0].split("/")[1]
    subplot.set_title(r'$\mathrm{Histogram\ of\ GS\ %s:}\ \mu=%.3f,\ \sigma=%.3f$' %(file.name.split(":")[0].split("/")[1],mu, sigma),fontsize=6)
    subplot.grid(True)
Пример #18
0
def log_histogram(code,r,bins=30):

    #loads the file containing the masses for code
    mass_file = cd.get_output_file_name(code=code,attribute='tm',redshift=r)
    #remove units from data with 'Msun' unit attached
    mass_data = cd.format_data(load_file=mass_file, file_unit_type='mass', tuple_data='no')
    x = [float(i) for i in mass_data]
    #Take the log base 10 of the masses
    logx = np.log10(x)

    #######################################
    # Plot with dashed bars and curve fit #
    #######################################
    
    plt.figure(figsize=(15,10))
    
    # best fit of data
    (mu, sigma) = norm.fit(logx)

    # the histogram of the data (n=logx bins=bins)
    n, bins, patches = plt.hist(logx, int(bins), normed=1, facecolor='green', alpha=0.75)

    # add a 'best fit' line
    y = mlab.normpdf( bins, mu, sigma)
    l = plt.plot(bins, y, 'r--', linewidth=2)

    #plot
    plt.xlabel('Msun')
    plt.ylabel('RFrequency')
    plt.title('Histogram of Mass distribution for %s %s' %(code,r))
    plt.xscale('log')
    plt.yscale('log')
    plt.xlim(left=0, right=14)
    plt.grid(True)
    plt.savefig(str(code)+str(r)+str(bins)+'loghist.png')
Пример #19
0
def tVertexErrorHist(diffs, nEvents, title=None, ranges=None, quiet=True):
    """
    Plots an error histogram for tVertex-genVertex z values.
    Usage: tVertexErrorHist(differences, number of counts, quiet=True)
    """
    absDiffs = np.absolute(diffs)
    fig, ax = plt.subplots()  # |Set up graph
    n, bins, patches = ax.hist(diffs, normed=False, range=ranges, bins=100)
    (mu, sigma) = norm.fit(diffs)  # |Fit curve
    muerr = sigma / np.sqrt(nEvents)
    dx = bins[1] - bins[0]  # |Get bin width
    scale = dx * len(absDiffs)  # |Scale histogram
    fitline = mlab.normpdf(bins, mu, sigma) * scale
    ax.plot(bins, fitline, "r--", linewidth=2)  # |Plot fit line
    ax.set_xlabel("Error (mm)")
    ax.set_ylabel("Counts ($\Sigma=%i$)" % nEvents)
    if title == None:
        ax.set_title("tVertexed $z$ - genVertex $z$ for 500GeV $\gamma$-gun")
    else:
        ax.set_title(title)
    # Build output and figure text string
    string = "$\mu$ = %.3f$\pm$%.3fmm, $\sigma_{fitted}$ = %.3fmm \n" % (mu, muerr, sigma)  # |Print out info box
    string += "68% error magnitude:      {0:>6.3f}mm \n".format(np.percentile(absDiffs, 68.27))
    string += "Median error magnitude:   {0:>6.3f}mm \n".format(np.median(absDiffs))
    string += "Mean error magnitude:     {0:>6.3f}mm \n".format(np.mean(absDiffs))
    # Changing font to look nicer and line up colons
    font = {"family": "monospace", "weight": "bold", "size": 9}
    matplotlib.rc("font", **font)  # |Changes fond to monospace
    # Annotate plot
    ax.text(0.02, 0.99, string, transform=ax.transAxes, verticalalignment="top")
    if not quiet:
        print string
    # Show it
    plt.show()
Пример #20
0
def estimate_normal_params(x, outlier=True, value=3.):
    """
    Estimate the mean (as median) and veriance (as MAD)

    Paramters:
    ----------------------

    x: array,
       float
     outlier: bool,
              with or without outlier removal
    value: float,
           factor for outlier removal (value * sd +. median is removed)

    Returns:
    ---------------------
    (loca, scale): float, float
                  tuple of loc and scale estimation
    """
    if outlier:
        mu, sd = norm.fit(x)
        mu = np.median(x)
        sd = mad(x)
        t = x
        t = t[(t >= mu - value * sd) & (t <= mu + value * sd)]
        loc = np.mean(t)
        scale = np.std(t)
    else:
        loc = np.median(x)
        scale = mad(x)

    return (loc, scale)
def getDistribution(filePath, refMeasurements):
    #List containing all corner measurements
    measurements = dE.getMeasurements(filePath,'corners_world =') 
    
    # Measurements which are not due to random noise
    filteredMeasurements = checkReference(refMeasurements, measurements, 50) 
    
    #Split filtered corners in list of points that belong together
    measurements_point_A = dE.removeSublistLevel(filteredMeasurements,0)
    measurements_point_B = dE.removeSublistLevel(filteredMeasurements,1)
    measurements_point_C = dE.removeSublistLevel(filteredMeasurements,2)        
    
    # Extracting difference in x and y coordinates from split corners
    x_A = dE.removeSublistLevel(measurements_point_A, 0)
    y_A = dE.removeSublistLevel(measurements_point_A, 1)
    
    x_B = dE.removeSublistLevel(measurements_point_B, 0)
    y_B = dE.removeSublistLevel(measurements_point_B, 1)
    
    x_C = dE.removeSublistLevel(measurements_point_C, 0)
    y_C = dE.removeSublistLevel(measurements_point_C, 1)
    
    # Putting all the differences in one list
    allDifferences = []
    allDifferences.extend(x_A)
    allDifferences.extend(y_A)
    allDifferences.extend(x_B)
    allDifferences.extend(y_B)
    allDifferences.extend(x_C)
    allDifferences.extend(y_C)
    
    # Getting normal distribution parameters
    mu, std = norm.fit(allDifferences)
    
    return mu, std, allDifferences
Пример #22
0
def plot_assumption_free(scores, data, bins=50):
    """
    Plots the scores from the analysis using the assumption free algorithm.
    """
    plt.figure()
    plt.subplot(2, 1, 1)
    (data.acc / data.acc.max()).plot()
    (data.hr / data.hr.max()).plot()
    data.ratio_log.plot()
    plt.legend(loc='best')
    plt.subplot(2, 1, 2)
    plt.plot(data.index[:len(scores)], scores)

    scores = [x for x in scores if abs(x) > 10 ** -10]
    s_mean, sigma = norm.fit(scores)
    plt.figure()
    plt.hist(scores,  bins=50, normed=True)
    plt.plot(bins, norm.pdf(bins, loc=s_mean, scale=sigma))
    vlin = linspace(s_mean - 3 * sigma, s_mean + 3 * sigma, 13)
    step = int(256 / ((len(vlin) - 1) / 2))
    colors = linspace(0, 1, 256)[::step][:(len(vlin) - 1) / 2]
    colors = [(c, 0, 0) for c in colors]
    colors += [(1, 1, 1)]
    colors += [(0, c, 0) for c in reversed(colors)]
    plt.vlines(vlin.tolist()[1:], 0, 1, colors[1:])
Пример #23
0
def plot(fileName, color, label):
    data = []
    f = open('%s%s' % (carpeta, file), 'r')
    lines = f.readlines()
    f.close()
    for line in lines:
        data.append(float(line))
    
    n, bins, patches = pyplot.hist(data, bins=15, range=(100, 500), normed=True, color="%c" % color, label=label, alpha=0.53, linewidth=0.3)
    
    # normal fitting
    (mu, sigma) = norm.fit(data)
    pdf_norm = mlab.normpdf( bins, mu, sigma)
    pyplot.plot(bins, pdf_norm, "%c--" % color, linewidth=1, label=None)
    
    # lognormal fitting
    shape, loc, scale = stats.lognorm.fit(data, floc=0)
    mu = np.log(scale) # Mean of log(X)
    sigma = shape # Standard deviation of log(X)
    M = np.exp(mu) # Geometric mean == median
    s = np.exp(sigma) # Geometric standard deviation
    x = np.linspace(100, 500)
    pdf_lognorm = stats.lognorm.pdf(x, shape, loc=0, scale=scale)
    pyplot.plot(x, pdf_lognorm, "%c" % color, linewidth=1, label=None) # Plot fitted curve
    
    pyplot.vlines(mu, 0, pdf_norm.max(), linestyle='-', label=None)
    pyplot.vlines(M, 0, pdf_lognorm.max(), linestyle=':', label=None)
    ax = pyplot.gca() # Get axis handle for text positioning
    ax.text(M, pdf_norm.max(), u"%s\nmedian=%.2f ms\n%i samples" % (fileName, M, len(data)), style='italic', color=color, size='small')
    pyplot.legend()
    pylab.savefig("%s%s" % (carpeta, 'img.png'))
Пример #24
0
def clik1((al,origclust,dellen)):
	allen = al.shape[0]
	stats = []
	for i in xrange(BOOTREPS):
		boot = np.array(random.sample(al,dellen))
		stats.append(clust(boot))
	return norm(*norm.fit(stats)).pdf(origclust)
Пример #25
0
def mcmc_tt(al=np.genfromtxt(ALIGNFILE,delimiter=',').astype(np.int), imps=IMPS):
	
	print 'Building likelihood distributions...'
	rdist = np.genfromtxt(RDIST, delimiter=',')
	ldist = norm(*norm.fit(rdist))
	pdist = cclass(al, imps)

	print 'Starting MCMC:'
	print 'Step#\t|New Lik\t|New PropLik\t|Old Lik\t|Old PropLik\t|Accept Prob'
	old = impute.impute(al,imps, orderfunc=ORDERFUNC)
	old_tt = tt.ttratio(old)
	old_lik = ldist.pdf(old_tt)
	old_plik = pdist.pdf(old_tt)

	states = [(clust(old),old_lik,old_plik,old_lik,old_plik,1)]

	for i in xrange(STEPS):
		prop = impute.impute(al,imps, orderfunc=ORDERFUNC)
		prop_tt = tt.ttratio(prop)
		prop_lik = ldist.pdf(prop_tt)
		prop_plik = pdist.pdf(prop_tt)

		a = (prop_lik/old_lik)*(old_plik/prop_plik)
		states.append((clust(old),prop_lik,prop_plik,old_lik,old_plik,a))
		print '%d\t|%2f\t|%2f\t|%2f\t|%2f\t|%e' % (i+1,prop_lik,prop_plik,old_lik,old_plik,a)
		if random.random()<a:
			old, old_tt, old_lik, old_plik = prop, prop_tt, prop_lik, prop_plik

	states.append((clust(old),prop_lik,prop_plik,old_lik,old_plik,a))
	np.savetxt(OUT_STATES, np.array(states), delimiter=',')
Пример #26
0
def tlik((al,origtt,dellen)):
	allen = al.shape[0]
	stats = []
	for i in xrange(BOOTREPS):
		boot = np.array(random.sample(al,dellen))
		stats.append(tt.ttratio(boot))
	return norm(*norm.fit(stats)).pdf(origtt)
Пример #27
0
def clik((al,origclust,dellen)):
	allen = al.shape[0]
	stats = []
	for i in xrange(BOOTREPS):
		boot = al[np.random.choice(xrange(allen),dellen,replace=0)]
		stats.append(clust(boot))
	return norm(*norm.fit(stats)).pdf(origclust)
Пример #28
0
def construct_gs_hist(del_bl=8.,num_bl=10,beam_sig=0.09,fq=0.1):
    save_tag = 'grid_del_bl_{0:.2f}_num_bl_{1}_beam_sig_{2:.2f}_fq_{3:.3f}'.format(del_bl,num_bl,beam_sig,fq)
    save_tag_mc = 'grid_del_bl_{0:.2f}_num_bl_{1}_beam_sig_{2:.2f}_fq_{3}'.format(del_bl,num_bl,beam_sig,fq)
    ys = load_mc_data('{0}/monte_carlo/{1}'.format(data_loc,save_tag_mc))
    print 'ys ',ys.shape
    
    alms_fg = qgea.generate_sky_model_alms(gsm_fits_file,lmax=3)
    alms_fg = alms_fg[:,2]

    baselines,Q,lms = load_Q_file(gh='grid',del_bl=del_bl,num_bl=num_bl,beam_sig=beam_sig,fq=fq,lmax=3)
    N = total_noise_covar(0.1,baselines.shape[0],'{0}/gsm_matrices/gsm_{1}.npz'.format(data_loc,save_tag))
    MQN = return_MQdagNinv(Q,N,num_remov=None)
    print MQN
    ahat00s = n.array([])
    for ii in xrange(ys.shape[1]):
        #_,ahat,_ = qgea.test_recover_alms(ys[:,ii],Q,N,alms_fg,num_remov=None)
        ahat = uf.vdot(MQN,ys[:,ii])
        ahat00s = n.append(n.real(ahat[0]),ahat00s)
    #print ahat00s
    print ahat00s.shape
    _,bins,_ = p.hist(ahat00s,bins=36,normed=True)

    # plot best fit line
    mu,sigma = norm.fit(ahat00s)
    print "mu, sigma = ",mu,', ',sigma
    y_fit = mpl.mlab.normpdf(bins,mu,sigma)
    p.plot(bins, y_fit, 'r--', linewidth=2)

    p.xlabel('ahat_00')
    p.ylabel('Probability')
    p.title(save_tag)
    p.annotate('mu = {0:.2f}\nsigma = {1:.2f}'.format(mu,sigma), xy=(0.05, 0.5), xycoords='axes fraction')
    p.savefig('./figures/monte_carlo/{0}.pdf'.format(save_tag))
    p.clf()
def plot_delay(file):

    delays=[]
    for line in file:
        if line.find("time") != -1:
            if line[line.find("time")+4]=='=':
                data =line.split("time")[1].split("=")[1].split(" ")[0]
                delays.append(float(data))
    (mu,sigma) = norm.fit(delays)
    print mu, sigma


    n , bins , patches = plt.hist(delays, 15,normed=True,facecolor='green',alpha=1)
    #print bins
    #y = mlab.normpdf(bins,average_bandwith,sd)
    y = mlab.normpdf(bins,mu,sigma)
   # plt.xlabel("Bandwidth (Mbps)",fontsize=15)
    plt.xlabel("Delay (ms)",fontsize=13,style='italic')
    plt.ylabel("Ocurrence probability",fontsize=13,style='italic')
    data1=frange(140,180,0.3)

    #subplot.plot(bins,y,'b-')
    plt.plot(data1, 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (data1 - mu)**2 / (2 * sigma**2) ),linewidth=2, color='r')
    print file.name.split(":")[0].split("/")[1]
    plt.title(r'$\mathrm{Histogram\ of\ GS\ %s:}\ \mu=%.3f,\ \sigma=%.3f$' %(gs[file.name.split(":")[0].split("/")[1]],mu, sigma),fontsize=16)
    plt.grid(True)
    plt.tight_layout()
Пример #30
0
def mcmc_sym_dist(alignment, num_imp, dem_ratios, directory, length, burnin):
	acceptances = 0
	d = transprobs(TRANSITIONS, MARGINAL)
	pd = pdn(alignment)
	mins = np.array([sorted(i) for i in pd])
	nloc, nscale = norm.fit(mins)
	dist = norm(nloc, nscale)
	# Build first state of Markov chain
	print 'Imputing first alignment...'
	current = impute.imp_align(num_imp, alignment, dem_ratios)
	current.loglik = loglik(current)+math.log(distlik(current, num_imp, nloc, 1000))
	print '\t Log likelihood %2f' % current.loglik
	if not burnin: AlignIO.write(current, '%s/%d.fasta' % (directory,0), 'fasta')
	# Run chain
	for i in xrange(1,length+1):
		proposal = propose(current,num_imp,max(norm(loc=2,scale=1).rvs(),1), d)
		l1 = loglik(proposal)
		l2 = math.log(distlik(proposal, num_imp, nloc, 1000))
		proposal.loglik = l1+l2
		p = proposal.loglik-current.loglik
		print 'Current LLH: %2f; Proposed LLH: %2f' % (current.loglik, proposal.loglik)
		print '\tPhylogeny component: %2f; Distance component: %2f' % (l1, l2)
		print '\tAcceptance probability %e' % math.exp(p)
		if random.random()<math.exp(p):
			current = proposal
			acceptances += 1
			print '\tAccepted'
		else: print '\tNot accepted'
		if i > burnin:
			AlignIO.write(current, '%s/%d.fasta' % (directory,i-burnin), 'fasta')
	return float(acceptances)/length
Пример #31
0
def absolute_sdm(obs_cube, mod_cube, sce_cubes, *args, **kwargs):
    """
    apply absolute scaled distribution mapping to all scenario cubes
    assuming a normal distributed parameter

    Args:

    * obs_cube (:class:`iris.cube.Cube`):
        the observational data

    * mod_cube (:class:`iris.cube.Cube`):
        the model data at the reference period

    * sce_cubes (:class:`iris.cube.CubeList`):
        the scenario data that shall be corrected

    Kwargs:

    * cdf_threshold (float):
        limit of the cdf-values (default: .99999)
    """
    from scipy.stats import norm
    from scipy.signal import detrend

    cdf_threshold = kwargs.get('cdf_threshold', .99999)

    obs_cube_mask = np.ma.getmask(obs_cube.data)
    cell_iterator = np.nditer(obs_cube.data[0], flags=['multi_index'])
    while not cell_iterator.finished:
        index_list = list(cell_iterator.multi_index)
        cell_iterator.iternext()

        index_list.insert(0, 0)
        index = tuple(index_list)
        if obs_cube_mask and obs_cube_mask[index]:
            continue

        index_list[0] = slice(0, None, 1)
        index = tuple(index_list)

        # consider only cells with valid observational data
        obs_data = obs_cube.data[index]
        mod_data = mod_cube.data[index]

        obs_len = len(obs_data)
        mod_len = len(mod_data)

        obs_mean = obs_data.mean()
        mod_mean = mod_data.mean()

        # detrend the data
        obs_detrended = detrend(obs_data)
        mod_detrended = detrend(mod_data)

        obs_norm = norm.fit(obs_detrended)
        mod_norm = norm.fit(mod_detrended)

        obs_cdf = norm.cdf(np.sort(obs_detrended), *obs_norm)
        mod_cdf = norm.cdf(np.sort(mod_detrended), *mod_norm)
        obs_cdf = np.maximum(np.minimum(obs_cdf, cdf_threshold),
                             1 - cdf_threshold)
        mod_cdf = np.maximum(np.minimum(mod_cdf, cdf_threshold),
                             1 - cdf_threshold)

        for sce_cube in sce_cubes:
            sce_data = sce_cube[index].data

            sce_len = len(sce_data)
            sce_mean = sce_data.mean()

            sce_detrended = detrend(sce_data)
            sce_diff = sce_data - sce_detrended
            sce_argsort = np.argsort(sce_detrended)

            sce_norm = norm.fit(sce_detrended)
            sce_cdf = norm.cdf(np.sort(sce_detrended), *sce_norm)
            sce_cdf = np.maximum(np.minimum(sce_cdf, cdf_threshold),
                                 1 - cdf_threshold)

            # interpolate cdf-values for obs and mod to the length of the
            # scenario
            obs_cdf_intpol = np.interp(np.linspace(1, obs_len, sce_len),
                                       np.linspace(1, obs_len, obs_len),
                                       obs_cdf)
            mod_cdf_intpol = np.interp(np.linspace(1, mod_len, sce_len),
                                       np.linspace(1, mod_len, mod_len),
                                       mod_cdf)

            # adapt the observation cdfs
            # split the tails of the cdfs around the center
            obs_cdf_shift = obs_cdf_intpol - .5
            mod_cdf_shift = mod_cdf_intpol - .5
            sce_cdf_shift = sce_cdf - .5
            obs_inverse = 1. / (.5 - np.abs(obs_cdf_shift))
            mod_inverse = 1. / (.5 - np.abs(mod_cdf_shift))
            sce_inverse = 1. / (.5 - np.abs(sce_cdf_shift))
            adapted_cdf = np.sign(obs_cdf_shift) * (
                1. - 1. / (obs_inverse * sce_inverse / mod_inverse))
            adapted_cdf[adapted_cdf < 0] += 1.
            adapted_cdf = np.maximum(np.minimum(adapted_cdf, cdf_threshold),
                                     1 - cdf_threshold)

            xvals = norm.ppf(np.sort(adapted_cdf), *obs_norm) \
                + obs_norm[-1] / mod_norm[-1] \
                * (norm.ppf(sce_cdf, *sce_norm) - norm.ppf(sce_cdf, *mod_norm))
            xvals -= xvals.mean()
            xvals += obs_mean + (sce_mean - mod_mean)

            correction = np.zeros(sce_len)
            correction[sce_argsort] = xvals
            correction += sce_diff - sce_mean
            sce_cube.data[index] = correction
Пример #32
0
# Compile and run our program
process = Popen(["gcc", "-o", "01_cachedemo", "01_cachedemo.c"])
process.wait()
process = Popen(
    ["./01_cachedemo", "10000", "10000", "01_flush.txt", "01_noflush.txt"])
process.wait()

# Generate histograms for measurements of NOT cached values
s = ""
with open('01_flush.txt') as f:
    s = f.read()
s = numpy.fromstring(s, dtype=int, sep=',')
srt_row = numpy.array(sorted(s, key=int, reverse=False)).astype(numpy.int)
# Sort and cut away 5% of the biggest measurements
cutSortedRow = srt_row[:int(len(srt_row) * 0.95)]
(mu, sigma) = norm.fit(cutSortedRow)
n, bins, patches = plt.hist(cutSortedRow,
                            weights=numpy.zeros_like(cutSortedRow) +
                            1. / cutSortedRow.size)
y = norm.pdf(bins, mu, sigma)
plt.plot(bins, y, 'r--', linewidth=2)
plt.xlabel('Cycles')
plt.ylabel('Frequency')
plt.title(
    f"""Main memory access speed - \u03BC: {round(mu, 2)}, \u03C3: {round(sigma, 2)}"""
)
plt.grid(True)
plt.locator_params(axis='x', nbins=5)
plt.tight_layout()
plt.savefig('01_flush.png')
plt.close()
Пример #33
0
def hist(df, feature, bins=50):
    '''Plots bokeh histogram, PDF & CDF of a DF feature.
    
    Parameters
    ----------
    df : DataFrame
        DF of the data.
    feature :  str
        Column name of the df.
    bins : int
        Number of bins to plot.
        
    Returns
    -------
    None
    '''

    #not nan feature values
    x = df[feature][df[feature].notna()].values

    #Get the values for the histogram and bin edges (length(hist)+1)/
    #Use density to plot pdf and cdf on the same plot.
    hist, edges = np.histogram(x, bins=bins, density=True)

    ### PDF & CDF ##

    #find normal distribution parameters
    mu, sigma = norm.fit(x)
    xs = np.linspace(min(x), max(x) + 1, len(x))  #x values to plot the line(s)

    pdf = norm.pdf(xs, loc=mu, scale=sigma)  #probability distribution function
    cdf = norm.cdf(xs, loc=mu, scale=sigma)  #cumulative distribution function

    #data sources for cdf
    source_cdf = ColumnDataSource({'cdf': cdf, 'xs': xs})

    #create the canvas
    p1 = figure(title='Histogram, PDF & CDF',
                plot_height=400,
                x_axis_label=feature,
                y_axis_label='Density')

    #add histogram
    p1.quad(bottom=0,
            top=hist,
            left=edges[:-1],
            right=edges[1:],
            fill_color='royalblue',
            line_color='black',
            alpha=0.7)

    #add pdf
    p1.line(xs,
            pdf,
            line_color='red',
            line_width=5,
            alpha=0.5,
            legend_label='PDF')

    #set left-hand y-axis range
    p1.y_range = Range1d(0, max(hist) + 0.05 * max(hist))

    #setting the second y axis range name and range
    p1.extra_y_ranges = {"cdf": Range1d(start=0, end=1.05)}

    #adding the second y axis to the plot and to the right.
    p1.add_layout(LinearAxis(y_range_name="cdf", axis_label='CDF'), 'right')

    #add cdf with y range on the right
    cdf_plot = p1.line('xs',
                       'cdf',
                       source=source_cdf,
                       alpha=0.8,
                       line_color='darkgoldenrod',
                       line_width=5,
                       legend_label='CDF',
                       y_range_name='cdf',
                       name='cdf',
                       hover_line_color='green')

    #hover tool
    p1.add_tools(
        HoverTool(renderers=[cdf_plot],
                  tooltips=[('Prob', '@cdf{0.00}')],
                  mode='hline'))

    #figure properties
    p1.xgrid.visible = False

    #hide entries when clocking on a legend
    p1.legend.click_policy = "hide"

    show(p1)
Пример #34
0
    vec = P
elif sys.argv[3] == 'eccentricity':
    vec = E
elif sys.argv[3] == 'solidity':
    vec = S
    
history = vec
nn = int(steps)
for counter in range(nn):
    vec = SS.dot(vec)
    history = np.hstack((history,vec)) 

##########################################                       
# Fit a normal distribution to the data:
attribute = np.log2(np.mean(history,axis=1))
mu, std = norm.fit(attribute) # you could also fit to a lognorma the original data
sns.set(style='white', rc={'figure.figsize':(5,5)})
plt.hist(attribute, bins=100, density=True, alpha=0.6, color='g')
#Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2)
title = "Fit results: mu = %.2f,  std = %.2f" % (mu, std)
plt.title(title)
plt.savefig("./png/distro-"+str(sys.argv[3])+".png") # save as png
plt.close()
###########################################

# create empty list for node colors
pos = XY
Пример #35
0
                vp.append(v) 
            for w in v:
               wp.append(w) 
        UN2s.append(size(np.unique(wp)))


print np.mean(OBS2s), np.mean(DET2s), np.mean(CHAR2s), np.mean(UN2s)

#D1 = UNs
#D2 = UN2s
#xmax = 11
D1 = DETs
D2 = CHARs
xmax = 18
bins = range(xmax+1)
(mu, sigma) = norm.fit(D1)
y1 = mlab.normpdf( bins, mu, sigma)
(mu, sigma) = norm.fit(D2)
y2 = mlab.normpdf( bins, mu, sigma)

close('all')
figure(2)
grid('on')
fsz = 18
plot(bins, y1, 'b-o', linewidth=2, markersize=5, label='KasdinBraems')
plot(bins, y2, 'r-o', linewidth=2, markersize=5, label='Nemati')
#xlabel('Unique planet detections', fontsize=fsz)
xlabel('Total planet detections', fontsize=fsz)
ylabel('Normalized frequency', fontsize=fsz)
xlim(0,xmax)
tick_params(axis='both', which='major', labelsize=fsz)
Пример #36
0
 #
 zero_file = 'fms/agents/zerointelligencetrader.py'
 with open(zero_file, 'w') as f:
     f.write(zero_agent)
 #
 process = Popen(['python2', 'startfms.py', 'run', 'config.yml'], stdout=PIPE, stderr=PIPE)
 stdout, stderr = process.communicate()
 if len(stdout) != 0:
     print('STDOUT', stdout)
 if len(stderr) != 0:
     print('STDERR', stderr)
 #
 df = pd.read_csv('output.csv', skiprows=[0], sep=';')
 df['return'] = df['price'] / 100000 - 1
 #
 mu, sigma = norm.fit(df['return'])
 skew, kurtosis = st.skew(df['return']), st.kurtosis(df['return'])
 autocorr = f_autocorr(df['return'].dropna().abs())[0, 1]
 print('{},{},{},{},{}'.format(
     mu, sigma, skew, kurtosis, autocorr))
 result_df = result_df.append({
     'zero_pct': zero_pct,
     'herding_pct': herding_pct,
     'threshold_pct': threshold_pct,
     'mu': mu,
     'sigma': sigma,
     'skew': skew,
     'kurtosis': kurtosis,
     'autocorr': autocorr,
 }, ignore_index=True)
 result_df.to_csv('result.csv.10times.2.csv', index=False)
Пример #37
0
extractor = AppearanceExtractor(0, 0, TEST_SEASONS, 1, 1)
train_input, train_output = extractor.get_train_data()
non_mol = [
    data[0] for data, label in zip(train_input, train_output) if label == 0.0
]
mol = [
    data[0] for data, label in zip(train_input, train_output) if label == 1.0
]

plt.figure(figsize=(12, 3))
plt.xlabel("Relative Appearance")
plt.ylabel("Is 'mol'")
plt.yticks(np.linspace(0.0, 1.0, 11))
plt.gcf().subplots_adjust(bottom=0.15)

mol_norm = norm.fit(mol)
X = np.linspace(-1.5, 1.0, 500)
mol_Y = [norm.pdf(x, loc=mol_norm[0], scale=mol_norm[1]) for x in X]
plt.plot(X, mol_Y, color='r')

non_mol_norm = norm.fit(non_mol)
non_mol_Y = [
    norm.pdf(x, loc=non_mol_norm[0], scale=non_mol_norm[1]) for x in X
]
plt.plot(X, non_mol_Y, color='g')

non_mol_multiplier = len(non_mol) / len(train_output)
mol_multiplier = len(mol) / len(train_output)
posterior = [
    my * mol_multiplier / (my * mol_multiplier + ny * non_mol_multiplier)
    for my, ny in zip(mol_Y, non_mol_Y)
Пример #38
0
        Ea_ch3oh.append(b_ch3oh)
    dEa_all.append(dEa)
    print mat.cat, mat.cattype, mat.ets_ch4, mat.ets_ch3oh, dEa

labels = []
colors = []
for mclass in dEa_dict:
    labels.append(mclass)
    colors.append(dEa_dict[mclass]['clr'])
n, bins, patches = plt.hist(dEa_all,
                            nbins,
                            normed=1,
                            label=labels,
                            color=colors,
                            stacked=True)
mu, std = norm.fit(dEa_all)
plt.xlim(0, 2)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2)
title = r"Fit results: $\mu$ = %.2f,  $\sigma$ = %.2f" % (mu, std)
plt.ylabel(r'Counts')
plt.xlabel(r'$E^a_{CH_4} - E^a_{CH_3OH}$ (eV)')
plt.title(title)
plt.tight_layout()
plt.legend(fontsize=10)
plt.savefig('fig-S7c-beef-RuO2-ECH4-ECH3OH.pdf')

plt.cla()
n, bins, patches = plt.hist(Ea_ch4,
Пример #39
0
# drop features
drop_feats = [
    'WW_GRS', 'PERCENT', 'NM_0.5W_T', 'NM_0.5W_M24', 'NM_0.5W_M26',
    'NM_0.5W_F24', 'NM_0.5W_F26', 'GENRE2'
]

df.drop(drop_feats, axis=1, inplace=True)

# check OBO
df['OBO'].describe()

# orginal data
sns.distplot(df['OBO'], fit=norm)
# Get the fitted parameters used by the function
(mu, sigma) = norm.fit(df['OBO'])
print('\n mu = {:.2f} and sigma = {:.2f}\n'.format(mu, sigma))
#Now plot the distribution
plt.legend(
    ['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)],
    loc='best')
plt.ylabel('Frequency')
plt.title('distribution')

#Get also the QQ-plot
fig = plt.figure()
res = stats.probplot(df['OBO'], plot=plt)
plt.show()

# log transformation
sns.distplot(np.log(df['OBO']), fit=norm)
Пример #40
0
def draw_distribution_of_contacts():
    import os
    import zipfile
    from bokeh.plotting import figure, output_file, show
    import numpy as np
    with zipfile.ZipFile('/Users/trman/OneDrive/Projects/PyTorch/trainingFiles/PDBBind/target_feature_vectors/aadistancematrix500.zip') as z:
        for dist_fl_name in z.namelist():
            if not os.path.isdir(dist_fl_name) and dist_fl_name.endswith("tsv"):
                print(dist_fl_name)
                dist_lst = []
                prot_id = dist_fl_name.split(".")[0]
                #dist_fl = open("{}/{}".format(dist_folder_path, dist_fl_name), "r")
                with z.open(dist_fl_name) as f:
                    row_ind = 0
                    for line in f:
                        col_values = str(line).split("\\t")
                        # print(col_values)

                        for col_ind in range(len(col_values)):
                            dist = 0
                            if col_ind > row_ind:
                                if col_ind==row_ind or (col_ind!=row_ind and col_values[col_ind]!="0.0"):
                                    try:
                                        dist = float(col_values[col_ind])
                                        dist_lst.append(dist)

                                    except:
                                        pass
                        row_ind += 1
                dist_lst = sorted(dist_lst)
                # print(dist_lst)
                output_file("line.html")

                p = figure(plot_width=400, plot_height=400)
                lst_indices = list(range(len(dist_lst)))

                # print(lst_indices)
                # add a circle renderer with a size, color, and alpha

                arr_hist, edges = np.histogram(dist_lst,
                                               bins=1000,
                                               range=[0.0, 1.0])

                # Put the information in a dataframe
                distances = pd.DataFrame({'arr_dist': arr_hist,
                                       'left': edges[:-1],
                                       'right': edges[1:]})
                # print(distances)

                # Create the blank plot
                p = figure(plot_height=600, plot_width=600,
                           title='Histogram distances of aminoacids on 3D',
                           x_axis_label='Aminoacid pairs',
                           y_axis_label='Distance')

                # Add a quad glyph
                p.quad(bottom=0, top=distances['arr_dist'],
                       left=distances['left'], right=distances['right'],
                       fill_color='red', line_color='black')
                print(pd.DataFrame(dist_lst).describe())
                # Show the plot
                show(p)

                import numpy as np
                from scipy.stats import norm
                import matplotlib.pyplot as plt

                # Generate some data for this demonstration.
                data = np.asarray(dist_lst)
                print(type(data))
                # Fit a normal distribution to the data:
                mu, std = norm.fit(np.asarray(data))

                # Plot the histogram.
                plt.hist(data, bins=100, density=True, alpha=0.6, color='g')

                # Plot the PDF.
                xmin, xmax = plt.xlim()
                x = np.linspace(xmin, xmax, 100)
                p = norm.pdf(x, mu, std)
                plt.plot(x, p, 'k', linewidth=2)
                title = "Fit results: mu = %.2f,  std = %.2f" % (mu, std)
                plt.title(title)

                plt.show()
def visualization(df_train, df_test):
    print(df_train['SalePrice'].describe())  # We’re going to predict the SalePrice column ($ USD)

    sns.set(style='whitegrid', palette='muted', font_scale=1.5)
    rcParams['figure.figsize'] = 14, 8
    sns.distplot(df_train['SalePrice'], fit=norm)
    (mu, sigma) = norm.fit(df_train['SalePrice'])
    plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)], loc='best')
    plt.title('Sale Prices')
    plt.xlabel('Sale Price')
    plt.ylabel('Probability')
    # plt.close()
    plt.show()
    # Most of the density lies between 100k and 250k, but there appears to be a lot of outliers on the pricier side.

    # -------------------------------- #

    # top 10 correlated features with the sale price:
    corr_matrix = df_train.corr()
    sns.heatmap(corr_matrix, vmax=.8, square=True)
    k = 10  # number of variables for heat map
    cols = corr_matrix.nlargest(k, 'SalePrice')['SalePrice'].index
    sns.heatmap(df_train[cols].corr().values.T, cbar=True, annot=True, square=True, yticklabels=cols.values,
                xticklabels=cols.values)
    # plt.close()
    plt.show()

    # Overall Quality vs Sale Price
    var = 'OverallQual'
    data = pd.concat([df_train['SalePrice'], df_train[var]], axis=1)
    data.plot.scatter(x=var, y='SalePrice', ylim=(0, 800000), s=32)
    plt.show()

    # Living Area vs Sale Price
    var = 'GrLivArea'
    data = pd.concat([df_train['SalePrice'], df_train[var]], axis=1)
    data.plot.scatter(x=var, y='SalePrice', ylim=(0, 800000), s=32)
    plt.show()

    # It makes sense that people would pay for the more living area.
    # What doesn't make sense is the two data points in the bottom-right of the plot.

    # Removing outliers manually (Two points in the bottom right)
    df_train = df_train.drop(df_train[(df_train['GrLivArea'] > 4000)
                                      & (df_train['SalePrice'] < 300000)].index).reset_index(drop=True)

    # After removing outliers, Living Area vs Sale Price
    var = 'GrLivArea'
    data = pd.concat([df_train['SalePrice'], df_train[var]], axis=1)
    data.plot.scatter(x=var, y='SalePrice', ylim=(0, 800000), s=32)
    plt.show()

    # GarageCars vs Sale Price
    var = 'GarageCars'
    data = pd.concat([df_train['SalePrice'], df_train[var]], axis=1)
    data.plot.scatter(x=var, y='SalePrice', ylim=(0, 800000), s=32)
    plt.show()

    # GarageArea vs Sale Price
    var = 'GarageArea'
    data = pd.concat([df_train['SalePrice'], df_train[var]], axis=1)
    data.plot.scatter(x=var, y='SalePrice', ylim=(0, 800000), s=32)
    plt.show()

    # Up to this point, we were exploring the data

    # Do we have missing data - train?

    total = df_train.isnull().sum().sort_values(ascending=False)
    percent = (df_train.isnull().sum() / df_train.isnull().count()).sort_values(ascending=False)
    missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
    missing_data = missing_data[missing_data.Total > 0]

    print(missing_data)

    total_test = df_test.isnull().sum().sort_values(ascending=False)
    percent_test = (df_test.isnull().sum() / df_test.isnull().count()).sort_values(ascending=False)
    missing_data_test = pd.concat([total_test, percent_test], axis=1, keys=['TotalTest', 'PercentTest'])
    missing_data_test = missing_data_test[missing_data_test.TotalTest > 0]

    print(missing_data_test)
Пример #42
0
def plot_disp(data, true_hadroness=False):
    """Plot the performance of reconstructed position

    Parameters:
    -----------
    data: pandas DataFrame

    true_hadroness: boolean
    True: True gammas and proton events are plotted (they are separated
    using true hadroness).
    False: Gammas and protons are separated using reconstructed
    hadroness (hadro_rec)
    """
    hadro = "reco_type"
    if true_hadroness:
        hadro = "mc_type"

    gammas = data[data[hadro] == 0]

    plt.subplot(221)

    reco_disp_norm = np.sqrt(gammas['reco_disp_dx']**2 +
                             gammas['reco_disp_dy']**2)
    disp_res = ((gammas['disp_norm'] - reco_disp_norm) / gammas['disp_norm'])

    section = disp_res[abs(disp_res) < 0.5]
    mu, sigma = norm.fit(section)
    print("mu = {}\n sigma = {}".format(mu, sigma))

    n, bins, patches = plt.hist(
        disp_res,
        bins=100,
        density=1,
        alpha=0.75,
        range=[-2, 1.5],
    )

    y = norm.pdf(bins, mu, sigma)

    plt.plot(bins, y, 'r--', linewidth=2)

    plt.xlabel('$\\frac{disp\_norm_{gammas}-disp_{rec}}{disp\_norm_{gammas}}$',
               fontsize=15)

    plt.figtext(0.15, 0.7, 'Mean: ' + str(round(mu, 4)), fontsize=12)
    plt.figtext(0.15, 0.65, 'Std: ' + str(round(sigma, 4)), fontsize=12)

    plt.subplot(222)

    hD = plt.hist2d(
        gammas['disp_norm'],
        reco_disp_norm,
        bins=100,
        range=([0, 1.1], [0, 1.1]),
    )

    plt.colorbar(hD[3])
    plt.xlabel('$disp\_norm_{gammas}$', fontsize=15)

    plt.ylabel('$disp\_norm_{rec}$', fontsize=15)

    plt.plot(gammas['disp_norm'], gammas['disp_norm'], "-", color='red')

    plt.subplot(223)
    theta2 = (gammas['src_x'] - gammas['reco_src_x'])**2 + (gammas['src_y'] -
                                                            gammas['src_y'])**2

    plt.hist(theta2, bins=100, range=[0, 0.1], histtype=u'step')
    plt.xlabel(r'$\theta^{2}(º)$', fontsize=15)
    plt.ylabel(r'# of events', fontsize=15)
Пример #43
0
mu = np.zeros(len(test))
std = np.zeros(len(test))

batman_good = []

#plt.plot(test)
#plt.show()

##Write data to file
out_file = open('sample_candidates.txt', 'w')
line1 = 'sector' + ',' + 'tessFile' + ',' + 'curveID' + ',' + 'correlation' + '\n'

good = []
for row in range(len(test)):

    mu, std = norm.fit(test[row])
    good.append(test[row][np.where(test[row] >= mu + 3 * std)])
    #print('values: ',test[row][0])
    #print('std: ', mu+3*std)
    #print('index: ',np.where(test[row] >= mu+1*std)[0])
    #plt.plot(test[row])
    #plt.show()

    batman_good.append(batman_indices[np.where(test[row] >= mu + 3 * std)])
good = np.asarray(good)
batman_good = np.asarray(batman_good)

try:
    for row in range(len(good)):
        for column in range(len(good[row] - 1)):
            line = str(sector[row]) + ',' + str(data[row]) + ',' + str(
Пример #44
0
residuals = gandalfs.zenith - primaries.zenith
cut = (gandalfs["lambda"] < l) & (np.abs(residuals) < 2 * np.pi)
residuals = residuals[cut]
event_info[cut]

# convert rad -> deg
residuals = residuals * 180 / np.pi

pi = 180
# x axis for plotting
x = np.linspace(-pi, pi, 1000)

c_loc, c_gamma = cauchy.fit(residuals)
fwhm = 2 * c_gamma

g_mu_bad, g_sigma_bad = norm.fit(residuals)
g_mu, g_sigma = norm.fit(residuals[np.abs(residuals) < 10])

plt.hist(residuals, bins="auto", label="Histogram", density=True, alpha=0.7)
plt.plot(
    x,
    cauchy(c_loc, c_gamma).pdf(x),
    label="Lorentz: FWHM $=${:.3f}".format(fwhm),
    linewidth=2,
)
plt.plot(
    x,
    norm(g_mu_bad, g_sigma_bad).pdf(x),
    label="Unrestricted Gauss: $\sigma =$ {:.3f}".format(g_sigma_bad),
    linewidth=2,
)
Пример #45
0
plt.xlabel('index')
plt.ylabel('Tempo de Permanência')
plt.title("Tempo de Permanência - Distribution")
plt.show();

#Target Variable Analysis

from scipy import stats

from scipy.stats import norm, skew

import seaborn as sns

import matplotlib.pyplot as plt 

(mu, sigma) = norm.fit(df_maio_19_reg['TEMPO_PERM_INT_POSTERIOR'])

plt.figure(figsize = (14, 7))
sns.distplot(df_maio_19_reg['TEMPO_PERM_INT_POSTERIOR'], fit = norm)
plt.ylabel('Frequency')
plt.title('Tempo de Permanência - Distribution')
plt.legend(['Normal Dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)], loc = 'best')

quantile_plot = stats.probplot(df_maio_19_reg['TEMPO_PERM_INT_POSTERIOR'], plot = plt)

import numpy as np

df_maio_19_reg['TEMPO_PERM_INT_POSTERIOR'] = np.log1p(df_maio_19_reg['TEMPO_PERM_INT_POSTERIOR'])

(mu, sigma) = norm.fit(df_maio_19_reg['TEMPO_PERM_INT_POSTERIOR'])
plt.figure(figsize = (14, 7))
import numpy as np
from scipy.stats import norm, multinomial
original_data = norm.rvs(loc=1.0, scale=0.5, size=1000, random_state=1386)
original_data[:20]

# In[ ]:

#Now replace every other element with the mean 1.0
missing_elements = np.asarray([0, 1] * 500)
updated_data = original_data * (1 - missing_elements) + missing_elements
updated_data[:20]

# In[ ]:

#Now, let's get mean and std of the new distribution:
mean, std = norm.fit(updated_data)
print(f'Mean: {mean}, std: {std}')

# As you see, even though the mean is the same, the standard deviation is much less. While the imputation of data this way increases the performance of the model, it also amplifies the bias that already exists in the data. In order to prevent amplification of the bias, we have to replace the missing values with a sample from the normal distribution with the same mean and standard deviation. For categorical features it would be a multinomial distribution.
#
# For debiasing we can try to increase the standard deviation of the distribution from which we sample data for numerical features, and a similar transformation for the multinomial distribution.
#
# In this notebook I suggest two classes for the numerical and categorical features respectively.

# ## Proposed solution ##

# In[ ]:

from sklearn.base import BaseEstimator, TransformerMixin
import numpy.ma as ma
from sklearn.utils.validation import check_is_fitted
Пример #47
0
            dt = dateutil.parser.parse(row['time']).astimezone(
                timezone(timedelta(hours=9)))
            all_conferences[0].append(dt)
            conferences[0].append(dt)
            all_participants[0].append(dt)
            participants[0].append(dt)

        all_conferences[1][j].append(int(row['conferences']))
        all_participants[1][j].append(int(row['participants']))
        j += 1
print("Cleaning time: %d seconds" % (time.time() - curr_time))

curr_time = time.time()
for d in all_conferences[1]:
    conf_norm_dist_funcs.append(norm.fit(d))
print("First Normal Distribution Functions Fitting time: %d seconds" %
      (time.time() - curr_time))

curr_time = time.time()
for d in all_participants[1]:
    part_norm_dist_funcs.append(norm.fit(d))
print("Second Normal Distribution Functions Fitting time: %d seconds" %
      (time.time() - curr_time))

curr_time = time.time()
for loc, scale in conf_norm_dist_funcs:
    conferences[1].append(norm.rvs(loc=loc, scale=scale, random_state=8192))
conferences[1] = np.clip(conferences[1], 0, None)
conferences[1] = savgol_filter(conferences[1], 91, 1)
conferences[1] = np.around(conferences[1])
Пример #48
0
def absSDM(obs, mod, sce, cdf_threshold=0.9999999):
    '''absolute scaled distribution mapping assuming a normal distributed parameter
    rewritten from pyCAT for 1D data

    obs :: observed variable time series
    mod :: modelled variable for same time series as obs
    sce :: to unbias modelled time series
    cdf_threshold :: upper and lower threshold of CDF

    returns corrected timeseries
    tested with pandas series.
    '''

    obs_len = len(obs)
    mod_len = len(mod)
    sce_len = len(sce)
    obs_mean = np.mean(obs)
    mod_mean = np.mean(mod)
    smean = np.mean(sce)
    odetrend = detrend(obs)
    mdetrend = detrend(mod)
    sdetrend = detrend(sce)

    obs_norm = norm.fit(odetrend)
    mod_norm = norm.fit(mdetrend)
    sce_norm = norm.fit(sdetrend)

    sce_diff = sce - sdetrend
    sce_argsort = np.argsort(sdetrend)

    obs_cdf = norm.cdf(np.sort(odetrend), *obs_norm)
    mod_cdf = norm.cdf(np.sort(mdetrend), *mod_norm)
    sce_cdf = norm.cdf(np.sort(sdetrend), *sce_norm)
    obs_cdf = np.maximum(np.minimum(obs_cdf, cdf_threshold), 1 - cdf_threshold)
    mod_cdf = np.maximum(np.minimum(mod_cdf, cdf_threshold), 1 - cdf_threshold)
    sce_cdf = np.maximum(np.minimum(sce_cdf, cdf_threshold), 1 - cdf_threshold)

    # interpolate cdf-values for obs and mod to the length of the scenario
    obs_cdf_intpol = np.interp(np.linspace(1, obs_len, sce_len),
                               np.linspace(1, obs_len, obs_len), obs_cdf)
    mod_cdf_intpol = np.interp(np.linspace(1, mod_len, sce_len),
                               np.linspace(1, mod_len, mod_len), mod_cdf)

    # adapt the observation cdfs
    # split the tails of the cdfs around the center
    obs_cdf_shift = obs_cdf_intpol - .5
    mod_cdf_shift = mod_cdf_intpol - .5
    sce_cdf_shift = sce_cdf - .5
    obs_inverse = 1. / (.5 - np.abs(obs_cdf_shift))
    mod_inverse = 1. / (.5 - np.abs(mod_cdf_shift))
    sce_inverse = 1. / (.5 - np.abs(sce_cdf_shift))
    adapted_cdf = np.sign(obs_cdf_shift) * (
        1. - 1. / (obs_inverse * sce_inverse / mod_inverse))
    adapted_cdf[adapted_cdf < 0] += 1.
    adapted_cdf = np.maximum(np.minimum(adapted_cdf, cdf_threshold),
                             1 - cdf_threshold)

    xvals = norm.ppf(np.sort(adapted_cdf), *obs_norm) \
            + obs_norm[-1] / mod_norm[-1] \
            * (norm.ppf(sce_cdf, *sce_norm) - norm.ppf(sce_cdf, *mod_norm))
    xvals -= xvals.mean()
    xvals += obs_mean + (smean - mod_mean)

    correction = np.zeros(sce_len)
    correction[sce_argsort] = xvals
    correction += sce_diff - smean

    return correction
Пример #49
0
    def __init__(self,
                 N=None,
                 size=1,
                 mu0=0.1,
                 sigma_mean0=10,
                 sigma_std0=1.0,
                 sigma_min=0.1,
                 sigma_max=10,
                 data=None):

        self.N = N
        self.K = size

        # Parameter initialization
        #random init
        if data is None:

            # mu = random normal with std mu0,mean 0
            self.mu = mu0 * np.random.randn(self.N, self.K).astype(DTYPE)

            # Sigma = random normal with mean sigma_mean0, std sigma_std0, and min/max of sigma_min, sigma_max
            self.Sigma = np.random.randn(self.N, 1).astype(DTYPE)
            self.Sigma *= sigma_std0
            self.Sigma += sigma_mean0
            self.Sigma = np.maximum(sigma_min,
                                    np.minimum(self.Sigma, sigma_max))
            self.Gaussian = np.concatenate((self.mu, self.Sigma), axis=1)

            # TensorVariables for mi, mj, si, sj respectivelly.
            a, b = T.fvectors('a', 'b')
            c, d = T.fscalars('c', 'd')

            # Energy as a TensorVariable
            E = -0.5 * (self.K * d / c + T.sum(
                (a - b)**2 / c) - self.K - self.K * T.log(d / c))
            self.enrg = function([a, b, c, d], E)

            g1 = T.grad(E, a)  # dE/dmi
            self.f1 = function([a, b, c, d], g1)

            g2 = T.grad(E, b)  # dE/dmj
            self.f2 = function([a, b, c, d], g2)

            g3 = T.grad(E, c)  # dE/dsi
            self.f3 = function([a, b, c, d], g3)

            g4 = T.grad(E, d)  # dE/dsj
            self.f4 = function([a, b, c, d], g4)

        #non random init
        else:
            self.mu = []
            self.Sigma = []

            for i in range(len(data)):
                mu, std = norm.fit(data[i])
                var = np.power(std, 2)
                self.mu.append(mu)
                self.Sigma.append(var)

            self.Gaussian = np.concatenate(
                (np.asarray(self.mu), np.asarray(self.Sigma)), axis=1)
            self.Gaussian = np.reshape(self.Gaussian, (2, N)).T
Пример #50
0
def create_1d_hist(fig,
                   ax,
                   hist,
                   title=None,
                   x_axis_title=None,
                   y_axis_title=None,
                   bins=101,
                   x_min=None,
                   x_max=None):
    if x_min is None:
        x_min = 0.0
    if x_max is None:
        if hist.all() is np.ma.masked:  # check if masked array is fully masked
            x_max = 1.0
        else:
            x_max = hist.max()
    hist_bins = int(x_max - x_min) + 1 if bins is None else bins
    if hist_bins > 1:
        bin_width = (x_max - x_min) / (hist_bins - 1)
    else:
        bin_width = 1.0
    hist_range = (x_min - bin_width / 2, x_max + bin_width / 2)
    #     if masked_hist.dtype.kind in 'ui':
    #         masked_hist[masked_hist.mask] = np.iinfo(masked_hist.dtype).max
    #     elif masked_hist.dtype.kind in 'f':
    #         masked_hist[masked_hist.mask] = np.finfo(masked_hist.dtype).max
    #     else:
    #         raise TypeError('Inappropriate type %s' % masked_hist.dtype)
    masked_hist_compressed = np.ma.masked_invalid(
        np.ma.masked_array(hist)).compressed()
    if masked_hist_compressed.size == 0:
        ax.plot([])
    else:
        _, _, _ = ax.hist(
            x=masked_hist_compressed,
            bins=hist_bins,
            range=hist_range,
            align='mid')  # re-bin to 1d histogram, x argument needs to be 1D
    # BUG: np.ma.compressed(np.ma.masked_array(hist, copy=True)) (2D) is not equal to np.ma.masked_array(hist, copy=True).compressed() (1D) if hist is ndarray
    ax.set_xlim(hist_range)  # overwrite xlim
    if hist.all() is np.ma.masked:  # or np.allclose(hist, 0.0):
        ax.set_ylim((0, 1))
        ax.set_xlim((-0.5, +0.5))
    elif masked_hist_compressed.size == 0:  # or np.allclose(hist, 0.0):
        ax.set_ylim((0, 1))
    # create histogram without masked elements, higher precision when calculating gauss
#     h_1d, h_bins = np.histogram(np.ma.masked_array(hist, copy=True).compressed(), bins=hist_bins, range=hist_range)
    if title is not None:
        ax.set_title(title)
    if x_axis_title is not None:
        ax.set_xlabel(x_axis_title)
    if y_axis_title is not None:
        ax.set_ylabel(y_axis_title)
#     bin_centres = (h_bins[:-1] + h_bins[1:]) / 2
#     amplitude = np.amax(h_1d)

# defining gauss fit function

    def gauss(x, *p):
        amplitude, mu, sigma = p
        return amplitude * np.exp(-(x - mu)**2.0 / (2.0 * sigma**2.0))
#         mu, sigma = p
#         return 1.0 / (sigma * np.sqrt(2.0 * np.pi)) * np.exp(- (x - mu)**2.0 / (2.0 * sigma**2.0))

    def chi_square(observed_values, expected_values):
        return (chisquare(observed_values, f_exp=expected_values))[0]


#         chisquare = 0
#         for observed, expected in itertools.izip(list(observed_values), list(expected_values)):
#             chisquare += (float(observed) - float(expected))**2.0 / float(expected)
#         return chisquare

#     p0 = (amplitude, mean, rms)  # p0 is the initial guess for the fitting coefficients (A, mu and sigma above)
#     try:
#         coeff, _ = curve_fit(gauss, bin_centres, h_1d, p0=p0)
#     except (TypeError, RuntimeError), e:
#         logging.info('Normal distribution fit failed, %s', e)
#     else:

    xmin, xmax = ax.get_xlim()
    points = np.linspace(xmin, xmax, 500)
    #     hist_fit = gauss(points, *coeff)
    param = norm.fit(masked_hist_compressed)
    #     points = np.linspace(norm.ppf(0.01, loc=param[0], scale=param[1]), norm.ppf(0.99, loc=param[0], scale=param[1]), 100)
    pdf_fitted = norm.pdf(points, loc=param[0], scale=param[1]) * (
        len(masked_hist_compressed) * bin_width)
    ax.plot(points, pdf_fitted, "r--", label='Normal distribution')
    #     ax.plot(points, hist_fit, "g-", label='Normal distribution')
    try:
        median = np.median(masked_hist_compressed)
    except IndexError:
        logging.warning('Cannot create 1D histogram named %s', title)
        return
    ax.axvline(x=median, color="g")
    #     chi2, pval = chisquare(masked_hist_compressed)
    #     _, p_val = mstats.normaltest(masked_hist_compressed)
    #     textright = '$\mu=%.2f$\n$\sigma=%.2f$\n$\chi^{2}=%.2f$' % (coeff[1], coeff[2], chi2)
    #     props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    #     ax.text(0.85, 0.9, textright, transform=ax.transAxes, fontsize=8, verticalalignment='top', bbox=props)

    textleft = '$\Sigma=%d$\n$\mathrm{mean\,\mu=%.2f}$\n$\mathrm{std\,\sigma=%.2f}$\n$\mathrm{median=%.2f}$' % (
        len(masked_hist_compressed), param[0], param[1], median)
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax.text(0.05,
            0.9,
            textleft,
            transform=ax.transAxes,
            fontsize=8,
            verticalalignment='top',
            bbox=props)
Пример #51
0
#plt.ylabel('SalePrice',fontsize=13)
#plt.xlabel('GrLivArea',fontsize=13)
#plt.show()

train = train.drop(train[(train['GrLivArea'] > 4000)
                         & (train['SalePrice'] < 300000)].index)

#fig,ax=plt.subplots()
#ax.scatter(train['GrLivArea'],train['SalePrice'])
#plt.ylabel('SalePrice',fontsize=13)
#plt.xlabel('GrLivArea',fontsize=13)
#plt.show()

sns.distplot(train['SalePrice'], fit=norm)

(mu, sigma) = norm.fit(train['SalePrice'])
print('\n mu = {:.2f} ans sigma = {:.2f}\n'.format(mu, sigma))

#plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu,sigma)],loc='best')
#plt.ylabel('Frequency')
#plt.title('SalePrice distribution')

#fig=plt.figure()
#res=stats.probplot(train['SalePrice'],plot=plt)
#plt.show()

train["SalePrice"] = np.log1p(train["SalePrice"])

sns.distplot(train['SalePrice'], fit=norm)

(mu, sigma) = norm.fit(train['SalePrice'])
Пример #52
0
def bootstrap_context(Unit, eval_method='fr1', shuffle_num=10000, isfig=False):
    init_t = 0
    spkt_ob = []
    tstart = []
    tend = []
    dur = []
    ttemp = 0
    #create a long spiketrain containing only spikes from context
    for i, record_end in enumerate(Unit.marker.record[1]):
        spkctx = Unit.spktrain[(Unit.spktrain >= Unit.marker.door[0][i]) & (
            Unit.spktrain <= record_end)] - Unit.marker.door[0][i]
        spkt_ob.append(spkctx + init_t)
        dur.append((record_end - Unit.marker.door[0][i]))
        init_t += dur[i]
        tstart.append(ttemp)
        tend.append(ttemp + dur[i])
        ttemp += dur[i]
    spkt_observed = np.concatenate(spkt_ob)
    #keep the ISI the same but shuffled
    ISI = np.insert(np.diff(spkt_observed), 0, Unit.spktrain[0])
    #create pseudo spiketrains
    spk_shuffle = []
    for i in range(shuffle_num):
        spk_new = []
        currentspk = 0
        new_ISI = np.random.permutation(ISI)
        for isi in new_ISI:
            spk_new.append(currentspk + isi)
            currentspk += isi
        spk_shuffle.append(np.array(spk_new))

    #1. compare the cdi between observed and shuffled (not a good measure)
    if eval_method == 'cdi':
        thres = 2.17  #99% zscore value
        cdi_observed = cal_ctx_cdi(spkt_observed, Unit, tstart, tend, dur)[1]
        cdi_shuffle = []
        for spk_s in spk_shuffle:
            cdi_shuffle.append(cal_ctx_cdi(spk_s, Unit, tstart, tend, dur)[1])
        mu, sigma = norm.fit(cdi_shuffle)
        CI1 = thres * sigma + mu
        CI2 = mu - thres * sigma
        if cdi_observed > CI1:
            cell_identity = 'A'
        elif cdi_observed < CI2:
            cell_identity = 'B'
        else:
            cell_identity = 'others'
        if isfig:
            plt.figure()
            n, bins, patches = plt.hist(cdi_shuffle, bins=100)
            plt.axvline(cdi_observed, color='g')
            CI = thres * sigma + mu
            plt.axvline(CI, color='r')
            plt.show()
        return cell_identity

    #2. we compare the firing rate of each trial to its shuffled results

    if eval_method == 'fr1':
        thres = 1.96  #99% zscore value
        ctx_ob = cal_ctx_cdi(spkt_observed, Unit, tstart, tend, dur)[0]
        ctx_num = len(np.unique(Unit.marker.protocol))
        trl_num = np.unique(Unit.marker.protocol, return_counts=True)[1][0]
        ctx_shuffle_fr = np.zeros((ctx_num, trl_num, shuffle_num))
        for s, spk_s in enumerate(spk_shuffle):
            ctx_temp = cal_ctx_cdi(spk_s, Unit, tstart, tend, dur)[0]
            for c in range(ctx_num):
                for t in range(trl_num):
                    ctx_shuffle_fr[c][t][s] = ctx_temp[c]['fr'][t]
        #plot the shuffled distribution
        if isfig:
            f, ax = plt.subplots(ctx_num, trl_num, sharey=True, sharex=True)
            for c in range(ctx_num):
                for t in range(trl_num):
                    n, bins, patches = ax[c, t].hist(ctx_shuffle_fr[c][t],
                                                     60,
                                                     density=True,
                                                     alpha=0.75)
                    ax[c, t].axvline(ctx_ob[c]['fr'][t], color='g')
                    #we use 99% confidence interval as threshold
                    mu, sigma = norm.fit(ctx_shuffle_fr[c][t])
                    y = plt.mlab.normpdf(bins, mu, sigma)
                    ax[c, t].plot(bins, y, 'y--', linewidth=2)
                    CI = thres * sigma + mu
                    ax[c, t].axvline(CI, color='r')
        #show the context preference of each trial
        ctx_pref = np.zeros((ctx_num, trl_num))
        for c in range(ctx_num):
            for t in range(trl_num):
                mu, sigma = norm.fit(ctx_shuffle_fr[c][t])
                zobserved = (ctx_ob[c]['fr'][t] - mu) / sigma
                if abs(zobserved) < thres:
                    ctx_pref[c][t] = 0
                elif zobserved > thres:
                    ctx_pref[c][t] = 1
                elif zobserved < -thres:
                    ctx_pref[c][t] = -1
        #decide which context this unit prefers
        if np.sum(ctx_pref[0] == 1) >= 2:
            cell_identity = ctx_ob[0]['name']
        elif np.sum(ctx_pref[1] == 1) >= 2:
            cell_identity = ctx_ob[1]['name']
        else:
            cell_identity = 'others'
        print('This unit prefers ' + cell_identity)
        return cell_identity, ctx_pref
Пример #53
0
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

data = np.loadtxt("10000phidiffp01.txt",
                  dtype=float,
                  delimiter='\t',
                  usecols=range(2))  #Loading file with two columns of data

fig = plt.figure(1)  #Configuring side by side plots

hrange = 0.02  #Range of plot
bins = np.linspace(-hrange, hrange,
                   100)  #Number of bins and interval where it's defined

mu0, std0 = norm.fit(data[:, 0])  #Normal gaussian distribution fit
axs[0].hist(data[:, 0], bins, density=True)  #Histogram plot
axs[0].set_title(
    "$\sigma_{\phi} = 0.01$ with fitting $\mu=%.5s$, $\sigma_{gaus}=%.5s$" %
    (mu0, std0))  #Title of the subplot
axs[0].set(xlabel="$(\phi_{Kalman}-\phi_{real})$",
           ylabel="Distribution of tracks")  #Axis label of plots
p = norm.pdf(bins, mu0, std0)  #Fitted Gaussian definition
axs[0].plot(bins, p, 'k', linewidth=2)  #Gaussian plot

mu1, std1 = norm.fit(data[:, 1])
axs[1].hist(data[:, 1], bins, density=True)
axs[1].set_title(
    "$\sigma_{\phi} = 0.01$ with fitting $\mu=%.5s$, $\sigma_{gaus}=%.5s$" %
    (mu1, std1))
axs[1].set(xlabel="$(\phi_{Kalman}-\phi_{real})/\phi_{real}$",
#open file and get data
hdulist = fits.open("/Users/aliyah/Downloads/A1_mosaic.fits")
hdulist.info()
#hdulist[0].header()
image_data = hdulist[0].data
hdulist.close()
x_values = image_data
#print(image_data[:,1])



x_values = x_values[ x_values <= 3600]
x_value = x_values[ x_values <= 3450]
xv = x_value[ x_value >= 3390 ]
n,bins,patches= plt.hist(x_values,bins=3600)
mu, sigma= norm.fit(xv )
y=norm.pdf( bins, mu , sigma)
plt.figure(1)
plt.plot(bins, 10500000 * y)
plt.xlim([3300,3600])
plt.show()
print(mu, sigma)


#mean background count


plt.figure(2)
plt.imshow(image_data, cmap='gray')
plt.colorbar()
Пример #55
0
            plt.xlabel('Steps')
            plt.ylabel('time(seconds)')
            print(score)
            print('Policy-Iteration converged at step %d.' % (i + 1))
            break
        policy = new_policy
    return s


if __name__ == '__main__':
    env_name = 'FrozenLake-v0'
    env = gym.make(env_name)
    #optimal_policy = policy_iteration(env, gamma = g)
    #print(optimal_policy)
    #env.render()
    #scores = evaluate_policy(env, optimal_policy, gamma = g)
    #print('Average scores = ', np.mean(scores))
    s = []
    for i in range(100):
        steps = policy_iteration(env, gamma=g)
        s.append(steps)
    s = np.array(s)
    (mu, sigma) = norm.fit(s)
    n, bins, patches = plt.hist(s, 60, normed=1, facecolor='green', alpha=0.75)
    plt.xlabel('Steps')
    plt.ylabel('Probability')
    plt.title(r'$\mathrm{Histogram\ of\ steps:}\ \mu=%.3f,\ \sigma=%.3f$' %
              (mu, sigma))
    plt.grid(True)
    plt.show()
#check again the data size after dropping the 'Id' variable
print("\nThe train data size after dropping Id feature is : {} ".format(train.shape)) 
print("The test data size after dropping Id feature is : {} ".format(test.shape))

#Data_preprocessing
fig, ax = plt.subplots()
ax.scatter(x = train['growth_rate'], y = train['Attrition_rate'])
plt.ylabel('Attrition_rate', fontsize=13)
plt.xlabel('growth_rate', fontsize=13)
plt.show()

sns.distplot(train['Attrition_rate'] , fit=norm);

# Get the fitted parameters used by the function
(mu, sigma) = norm.fit(train['Attrition_rate'])
print( '\n mu = {:.2f} and sigma = {:.2f}\n'.format(mu, sigma))

#Now plot the distribution
plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)],
            loc='best')
plt.ylabel('Frequency')
plt.title('Attrition_rate distribution')

#Get also the QQ-plot
fig = plt.figure()
res = stats.probplot(train['Attrition_rate'], plot=plt)
plt.show()

print("The skewness of Attrition_rate is {}".format(train['Attrition_rate'].skew()))
Пример #57
0
Ntime = 100  # number of timesteps
h = 1  # steplength

part_pos_list = np.zeros(N, dtype=np.int)  # N particles in x=0.

# random walk in 1D
for t in range(Ntime):
    r_list = np.random.random(N)  # list with N random numbers between 0 and 1
    for i in range(N):
        if r_list[i] >= 0.5:
            part_pos_list[i] += h  # One step to the right
        else:
            part_pos_list[i] -= h  # One step to the left

# find standard deviation mu and variance sigma to the normal distribution best suited to part_pos_list
mu, sigma = norm.fit(part_pos_list)
print("mu =", mu, "sigma =", sigma)

# pre-plotting
xMax = np.max(np.abs(part_pos_list))  # maximum absolute x position value
xRange = (-xMax * 1.1, xMax * 1.1)  # Range for the plot
xAx = np.linspace(*xRange, 1000)  # list og x values for normal distribution
p = norm.pdf(xAx, mu, sigma)  # normal distribution

# plotting
savename = "RandomWalkIn1D"
fig, ax = plt.subplots(1, 1, num=savename)
# new axis for p
ax2 = ax.twinx()
# Set ax's patch invisible
ax.patch.set_visible(False)
Пример #58
0
def make_qoi_plots(data_directory,
                   plot_directory,
                   config=None,
                   data_type='kde',
                   iterations='all'):

    data_types = ['kde', 'results']

    assert isinstance(config,str) \
            or isinstance(config,PyposmatConfigurationFile) \
            or config is None
    assert os.path.isdir(data_directory)
    assert isinstance(plot_directory, str)
    assert data_type in data_types

    if not os.path.exists(plot_directory):
        os.mkdir(plot_directory)

    # process config argument
    if isinstance(config, str):
        o_config = PyposmatConfigurationFile()
        o_config.read(filename=config)
    elif isinstance(config, PyposmatConfigurationFile):
        o_config = PyposmatConfigurationFile()
    elif config is None:
        o_config = PyposmatConfigurationFile()
        o_config.read(
            filename=os.path.join(data_directory, 'pyposmat.config.in'))
    else:
        m = 'config arguement must either be a path string of a PyposmatConfigurationFile object'
        raise TypeError(m)

    if iterations == 'all':
        iterations = range(o_config.n_iterations)

    if data_type == 'kde':
        datas = [
            os.path.join(data_directory, 'pyposmat.kde.{}.out'.format(i + 1))
            for i in iterations
        ]
    elif data_type == 'results':
        datas = [
            os.path.join(data_directory, 'pyposmat.results.{}.out'.format(i))
            for i in iterations
        ]
    else:
        raise TypeError()

    plot_fns = []
    for qn in o_config.qoi_names:
        print('qoi_name:{}'.format(qn))
        plot_fn = os.path.join(plot_directory,
                               '{}.eps'.format(qn.replace('.', '_')))
        plot_fns.append(plot_fn)
        xlabel = qn
        ylabel = 'probablity density'
        o_plot = PyposmatQoiPlot(config=o_config)

        print('\tdetermining x_lims')
        x_min = None
        x_max = None
        for data_fn in datas:
            x_pctl_min = 0.15
            x_pctl_max = 1. - x_pctl_min
            o_data = PyposmatDataFile()
            o_data.read(filename=data_fn)

            from scipy.stats import norm
            mu, std = norm.fit(o_data.df[qn])
            norm_rv = norm(loc=mu, scale=std)

            if x_min == None:
                x_min = norm_rv.ppf(x_pctl_min)
            else:
                x_min = min(norm_rv.ppf(x_pctl_min), x_min)

            if x_max == None:
                x_max = norm_rv.ppf(x_pctl_max)
            else:
                x_max = max(norm_rv.ppf(x_pctl_max), x_max)

        for i, data_fn in enumerate(datas):
            print('\t{}'.format(data_fn))
            o_data = PyposmatDataFile()
            o_data.read(filename=data_fn)

            label = 'i={}'.format(iterations[i] + 1)
            o_plot.initialize_data(data=o_data)
            o_plot.add_qoi_plot(qoi_name=qn,
                                x_limits=[x_min, x_max],
                                label=label,
                                color=plt.cm.cool(i / len(datas)))

        o_plot.add_qoitarget(qoi_name=qn)
        o_plot.ax.set_xlim(x_min, x_max)
        o_plot.legend()
        o_plot.ax.set_xlabel(xlabel)
        o_plot.ax.set_ylabel(ylabel)
        o_plot.ax.ticklabel_format(axis='both', style='sci', scilimits=(0, 4))
        o_plot.savefig(filename=plot_fn, dpi=1300)

    return plot_fns
Пример #59
0
          'Price/Cash flow', 'Dividend Payout Ratio', 'Net Profit Margin', 'Gross Profit Margin', \
          'Cash Flow Margin', 'Return on Assets', 'Return on Equity', 'Return on Capital Employed', \
          'Gross Profit/Total Assets', 'Total Debt/Invested Capital', 'Inventory/Current Assets', \
          'Total Debt/Total Assets', 'Cash Ratio', 'Quick Ratio (Acid Test)', 'Current Ratio', 'Inventory Turnover', \
          'Asset Turnover', 'Price/Book', 'Dividend Yield', 'Volume Change (3mo)', \
          'Change in Shares Outstanding (3mo)', 'Total Volatility']
X = X[fields]
"""
yT = df.iloc[:, -1]
n = len(yT)
y = pd.Series([])
k = 523
inf = 100
for i in range(0, int(n / k)):
    vals = yT[i * k:(i + 1) * k]
    mu, std = norm.fit(vals)
    #bins = [-inf, mu - 2*std, mu + 2*std, inf]
    #bins = [-inf, mu - 2 * std, mu - std, mu, mu + std, mu + 2 * std, inf]
    #yCat = pd.cut(vals, bins=bins, labels=False)
    yCat = pd.qcut(vals, 4, labels=False)
    y = pd.concat([y, yCat])

X_trainDev, X_test, y_trainDev, y_test = train_test_split(X,
                                                          y,
                                                          test_size=0.2,
                                                          random_state=1)
X_train, X_dev, y_train, y_dev = train_test_split(X_trainDev,
                                                  y_trainDev,
                                                  test_size=0.25,
                                                  random_state=1)
Пример #60
0
#print(N_Sigma_i_med)
#print(np.amin(N_Sigma_i))
#print(np.amax(N_Sigma_i))

################

################Histogram 1
bins_wm=np.arange(np.amin(N_Sigma_i_wm) - 0.5,np.amax(N_Sigma_i_wm) + 0.5 , 0.5)
bins2_wm=np.arange(np.amin(N_Sigma_i_wm) - 0.5,np.amax(N_Sigma_i_wm) + 0.5 , 2)
#print(bins)
plt.hist(N_Sigma_i_wm,bins=bins_wm,alpha=0.4,histtype='stepfilled', normed = True, edgecolor = 'black', linewidth=0.8)
plt.xlabel("$N_σ$")
plt.ylabel("probability density")

##############fitting a gaussian to the above histogram
parameters = norm.fit(N_Sigma_i_wm)
pdf_x_wm = np.linspace(np.amin(N_Sigma_i_wm) - 0.5,np.amax(N_Sigma_i_wm) + 0.5 ,500)
fitted_pdf_wm = norm.pdf(pdf_x_wm,loc = parameters[0],scale = parameters[1])

plt.plot(pdf_x_wm,fitted_pdf_wm,"black", linestyle="dashed", linewidth=1.5)
plt.legend()

plt.show()

###############histogram for median
bins_med=np.arange(np.amin(N_Sigma_i_med) - 0.5,np.amax(N_Sigma_i_med) + 0.5 , 0.5)
bins2_med=np.arange(np.amin(N_Sigma_i_med) - 0.5,np.amax(N_Sigma_i_med) + 0.5 , 2)
#print(bins)
binned_array, b ,c = plt.hist(N_Sigma_i_med,bins=bins_med,alpha=0.4,histtype='stepfilled', normed = True, edgecolor = 'black', linewidth=0.8)
plt.xlabel("$N_σ$")
plt.ylabel("probability density")