def sample(self, n=1): left, right = self.low, self.high mode = sqrt(left * right) c = (mode - left) / (right - left) loc = left scale = right - left return triang.rvs(c=c, loc=loc, scale=scale, size=n)
def generateToy(): np.random.seed(12345) fig,ax = plt.subplots(4,sharex=True) #fig,ax = plt.subplots(2) powerlaw_arg = 2 triang_arg=0.7 n_samples = 500 #generate simple line with slope 1, from 0 to 1 frozen_powerlaw = powerlaw(powerlaw_arg) #powerlaw.pdf(x, a) = a * x**(a-1) #generate triangle with peak at 0.7 frozen_triangle = triang(triang_arg) #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale). frozen_uniform = uniform(0.2,0.5) frozen_uniform2 = uniform(0.3,0.2) x = np.linspace(0,1) signal = np.random.normal(0.5, 0.1, n_samples/2) data_frame = pd.DataFrame({'powerlaw':powerlaw.rvs(powerlaw_arg,size=n_samples), 'triangle':triang.rvs(triang_arg,size=n_samples), 'uniform':np.concatenate((uniform.rvs(0.2,0.5,size=n_samples/2),uniform.rvs(0.3,0.2,size=n_samples/2))), 'powerlaw_signal':np.concatenate((powerlaw.rvs(powerlaw_arg,size=n_samples/2),signal))}) ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf') hist(data_frame['powerlaw'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[0]) #hist(data_frame['powerlaw'],bins='blocks',fitness='poly_events',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[0]) ax[0].legend(loc = 'best') ax[1].plot(x, frozen_triangle.pdf(x), 'k-', lw=2, label='triangle pdf') hist(data_frame['triangle'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[1]) hist(data_frame['triangle'],bins='blocks',fitness='poly_events',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[1]) ax[1].legend(loc = 'best') #ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf') hist(data_frame['powerlaw_signal'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[2]) #hist(data_frame['powerlaw_signal'],bins='blocks',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[2]) ax[2].legend(loc = 'best') ax[3].plot(x, frozen_uniform.pdf(x)+frozen_uniform2.pdf(x), 'k-', lw=2, label='uniform pdf') hist(data_frame['uniform'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[3]) #hist(data_frame['uniform'],bins='blocks',fitness = 'poly_events',p0=0.05,normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[3]) ax[3].legend(loc = 'best') plt.show() fig.savefig('plots/toy_plots.png')
def sample(self): left, right, mode = self.low90, self.high90, self.geomean c = (mode - left) / (right - left) loc = left scale = right - left return triang.rvs(c=c, loc=loc, scale=scale)
fontweight='bold') ax.set_ylabel("Count", fontweight='bold') ax.set_xlabel("h, m", fontweight='bold') # Skin will be uniformly distributed skin_dist = uniform.rvs(loc=0, scale=10, size=n_trials, random_state=seed) ax = sns.distplot(skin_dist, kde=False, ax=axs[1, 0]) axs[1, 0].set_title("Input distribution for skin damage \n", fontweight='bold') ax.set_ylabel("Count", fontweight='bold') ax.set_xlabel("S", fontweight='bold') # Reservoir pressure will be use the triangular distribution # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.triang.html pres_dist = triang.rvs(c=0.5, loc=2750, scale=500, size=n_trials, random_state=seed) ax = sns.distplot(pres_dist, kde=False, ax=axs[1, 1]) axs[1, 1].set_title("Input distribution for reservoir pressure \n", fontweight='bold') ax.set_ylabel("Count", fontweight='bold') ax.set_xlabel("P, psia", fontweight='bold') plt.tight_layout() plt.show() # Custom function for performing PROSPER calculation def calc_OPR(OpenServe, k, h, S, P): DoSet(OpenServe, "PROSPER.SIN.IPR.Single.ResPerm", k)
# Generate three Bernoulli random numbers print(bernoulli.rvs(p, size=3)) # [0 1 0] # Poisson lmda = 2 x = 5 print(poisson.pmf(x, lmda)) print(poisson.cdf(x, lmda)) print(poisson.rvs(lmda, size=3)) # Uniform a = 3 b = 10 x = 10 print(uniform.pdf(x, loc=a, scale=(b - a))) # = 1 / 7 print(uniform.cdf(x, loc=a, scale=(b - a))) # = 1.0 print(uniform.rvs(loc=3, scale=(b - a), size=3)) # Exponential mu = 2 x = 2 print(expon.pdf(x, scale=(1 / mu))) print(expon.cdf(x, scale=(1 / mu))) print(expon.rvs(scale=(1 / mu), size=3)) # Triangular a = 1 b = 10 c = 7 print(triang.rvs(c, loc=a, scale=(b - a), size=3))
def distribution_compared_individual(data, sample_size= 10000): ''' This function works to compare the degree distribution at the individual and group mean level to a number of standard distribution: Uniform, Normal, Gamma, Exponential, Poisson, Triangular, LogNormal, and Weibull. Further, the comparison is made using a one-way Kolmogorov-Smirnov test for goodness of fit and a p-value is computed at the individual and group level. params: sample size = number of random values to generate per distribution test--default val = 10000 data = list of degree distributions ''' from scipy import stats from scipy.stats import uniform, norm, gamma, expon, poisson, triang, lognorm, weibull_min import numpy as np from numpy.random import weibull ##################################### ##################################### # Individual level comparison for jj, ll in zip(graphs['Subject'], graphs['Degree_dist']): path = jj[0] deg_dist = ll centered_deg_mean = np.asarray(deg_dist) - np.mean(deg_dist) data_uniform = uniform.rvs(size=sample_size, loc = np.min(centered_deg_mean), scale=np.max(centered_deg_mean)) data_normal = norm.rvs(size=sample_size,loc=np.median(centered_deg_mean),scale=1) data_gamma = gamma.rvs(a=5, size= sample_size) data_expon = expon.rvs(scale=1,loc= np.median(centered_deg_mean),size= sample_size) mu=3 data_poisson = poisson.rvs(mu=mu, size= sample_size) c= 0.158 data_triang = triang.rvs(c=c, size = sample_size) s = 0.954 data_lognorm = lognorm.rvs(s=s, size = sample_size) a = 5.0 data_weibull = np.random.weibull(a = a, size = sample_size) Distributions = [data_uniform, data_normal, data_gamma, data_expon, data_poisson, data_triang, data_lognorm, data_weibull] dist_names = ['Uniform', 'Normal', 'Gamma', 'Exponential', 'Poisson', 'Triangular', 'LogNormal', 'Weibull'] uniform_test = list(['Uniform:', stats.kstest(centered_deg_mean, data_uniform)]) normal_test = list(['Normal', stats.kstest(centered_deg_mean, data_normal)]) gamma_test = list(['Gamma', stats.kstest(centered_deg_mean, data_gamma)]) exponential_test = list(['Exponential', stats.kstest(centered_deg_mean, data_expon)]) poisson_test = list(['Poisson', stats.kstest(centered_deg_mean, data_poisson)]) triang_test = list(['Triang', stats.kstest(centered_deg_mean, data_triang)]) lognormal_test = list(['Lognormal', stats.kstest(centered_deg_mean, data_lognorm)]) weibull_test = list(['Weibull', stats.kstest(centered_deg_mean, data_weibull)]) for ii, kk in zip(Distributions, dist_names): sns.set_context('talk') ax = sns.histplot(ii, bins = 50, kde= True, color = 'skyblue') #hist_kws={'linewidth':15, 'alpha':1}) ax.set(title= kk + ' Sample Degree Distribution', ylabel = 'Frequency') aj = sns.histplot(centered_deg_mean, bins= 50, kde=True, color='forestgreen') plt.savefig( path + '/reports/' + kk + ' Sample Degree Distribution.png') plt.clf() ks_test_results_individual= [uniform_test, normal_test, gamma_test, exponential_test, poisson_test, triang_test, lognormal_test, weibull_test] with open(path + '/reports/' +'ks_test_results' + jj[0] +'.txt', 'w') as filehandle: for listitem in ks_test_results_individual: filehandle.write('%s\n' % listitem) return
def generateToy(): np.random.seed(12345) fig, ax = plt.subplots(4, sharex=True) #fig,ax = plt.subplots(2) powerlaw_arg = 2 triang_arg = 0.7 n_samples = 500 #generate simple line with slope 1, from 0 to 1 frozen_powerlaw = powerlaw( powerlaw_arg) #powerlaw.pdf(x, a) = a * x**(a-1) #generate triangle with peak at 0.7 frozen_triangle = triang( triang_arg ) #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale). frozen_uniform = uniform(0.2, 0.5) frozen_uniform2 = uniform(0.3, 0.2) x = np.linspace(0, 1) signal = np.random.normal(0.5, 0.1, n_samples / 2) data_frame = pd.DataFrame({ 'powerlaw': powerlaw.rvs(powerlaw_arg, size=n_samples), 'triangle': triang.rvs(triang_arg, size=n_samples), 'uniform': np.concatenate((uniform.rvs(0.2, 0.5, size=n_samples / 2), uniform.rvs(0.3, 0.2, size=n_samples / 2))), 'powerlaw_signal': np.concatenate((powerlaw.rvs(powerlaw_arg, size=n_samples / 2), signal)) }) ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf') hist(data_frame['powerlaw'], bins=100, normed=True, histtype='stepfilled', alpha=0.2, label='100 bins', ax=ax[0]) #hist(data_frame['powerlaw'],bins='blocks',fitness='poly_events',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[0]) ax[0].legend(loc='best') ax[1].plot(x, frozen_triangle.pdf(x), 'k-', lw=2, label='triangle pdf') hist(data_frame['triangle'], bins=100, normed=True, histtype='stepfilled', alpha=0.2, label='100 bins', ax=ax[1]) hist(data_frame['triangle'], bins='blocks', fitness='poly_events', normed=True, histtype='stepfilled', alpha=0.2, label='b blocks', ax=ax[1]) ax[1].legend(loc='best') #ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf') hist(data_frame['powerlaw_signal'], bins=100, normed=True, histtype='stepfilled', alpha=0.2, label='100 bins', ax=ax[2]) #hist(data_frame['powerlaw_signal'],bins='blocks',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[2]) ax[2].legend(loc='best') ax[3].plot(x, frozen_uniform.pdf(x) + frozen_uniform2.pdf(x), 'k-', lw=2, label='uniform pdf') hist(data_frame['uniform'], bins=100, normed=True, histtype='stepfilled', alpha=0.2, label='100 bins', ax=ax[3]) #hist(data_frame['uniform'],bins='blocks',fitness = 'poly_events',p0=0.05,normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[3]) ax[3].legend(loc='best') plt.show() fig.savefig('plots/toy_plots.png')
x = np.linspace(triang.ppf(0.01, c), triang.ppf(0.99, c), 100) ax.plot(x, triang.pdf(x, c), 'r-', lw=5, alpha=0.6, label='triang pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = triang(c) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = triang.ppf([0.001, 0.5, 0.999], c) np.allclose([0.001, 0.5, 0.999], triang.cdf(vals, c)) # True # Generate random numbers: r = triang.rvs(c, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def distribution_compared_group(data, sample_size=10000, reports_path='.'): ''' This function works to compare the degree distribution at the individual and group mean level to a number of standard distribution: Uniform, Normal, Gamma, Exponential, Poisson, Triangular, LogNormal, and Weibull. Further, the comparison is made using a one-way Kolmogorov-Smirnov test for goodness of fit and a p-value is computed at the individual and group level. params: sample size = number of random values to generate per distribution test--default val = 10000 data = list of degree distributions ''' from scipy import stats from scipy.stats import uniform, norm, gamma, expon, poisson, triang, lognorm, weibull_min import numpy as np from numpy.random import weibull import os import statistics ##################################### # Group level comparison group_deg_mean = [] for jj in data['Degree_dist']: group_deg_mean.append(statistics.mean(jj)) group_deg_array = np.array(group_deg_mean) group_centered_deg_mean = group_deg_array - np.mean(group_deg_array) data_uniform = uniform.rvs(size=sample_size, loc=np.min(group_centered_deg_mean), scale=np.max(group_centered_deg_mean)) data_normal = norm.rvs(size=sample_size, loc=np.median(group_centered_deg_mean), scale=1) data_gamma = gamma.rvs(a=5, size=sample_size) data_expon = expon.rvs(scale=1, loc=np.median(group_centered_deg_mean), size=sample_size) mu = 3 data_poisson = poisson.rvs(mu=mu, size=sample_size) c = 0.158 data_triang = triang.rvs(c=c, size=sample_size) s = 0.954 data_lognorm = lognorm.rvs(s=s, size=sample_size) a = 5.0 data_weibull = np.random.weibull(a=a, size=sample_size) Distributions = [ data_uniform, data_normal, data_gamma, data_expon, data_poisson, data_triang, data_lognorm, data_weibull ] dist_names = [ 'Uniform', 'Normal', 'Gamma', 'Exponential', 'Poisson', 'Triangular', 'LogNormal', 'Weibull' ] group_uniform = [ 'Uniform:', stats.kstest(group_centered_deg_mean, data_uniform) ] group_normal = [ 'Normal', stats.kstest(group_centered_deg_mean, data_normal) ] group_gamma = ['Gamma', stats.kstest(group_centered_deg_mean, data_gamma)] group_exponential = [ 'Exponential', stats.kstest(group_centered_deg_mean, data_expon) ] group_poisson = [ 'Poisson', stats.kstest(group_centered_deg_mean, data_poisson) ] group_traing = [ 'Triang', stats.kstest(group_centered_deg_mean, data_triang) ] group_lognormal = [ 'Lognormal', stats.kstest(group_centered_deg_mean, data_lognorm) ] group_weibull = [ 'Weibull', stats.kstest(group_centered_deg_mean, data_weibull) ] for ii, jj in zip(Distributions, dist_names): sns.set_context('talk') ax = sns.histplot(ii, bins=50, kde=True, color='skyblue') #hist_kws={'linewidth':15, 'alpha':1}) ax.set(title=jj + ' Sample Degree Distribution', ylabel='Frequency') aj = sns.histplot(group_centered_deg_mean, bins=50, kde=True, color='forestgreen') plt.savefig(reports_path + '/' + jj + ' Sample Degree Distribution.png') plt.clf() ks_test_results_group = [ group_uniform, group_normal, group_gamma, group_exponential, group_poisson, group_traing, group_lognormal, group_weibull ] with open(reports_path + '/' + 'ks_test_results_group.txt', 'w') as filehandle: for listitem in ks_test_results_group: filehandle.write('%s\n' % listitem) return ks_test_results_group
for x in range(0, random.randint(0, 6) ): S[Z[x]]=0 FC, FA, r, sC, sA, z1, z2=S File2.write( "@NAME:SCAFFOLDS VERSION:0.4.1 FORMAT:TEXT CONCATENATED\n") File2.write( "@NAME LENGTH\n") File2.write( "scaffold_1 100000\n") File2.write( "@END_TABLE\n") File2.write( "@NAME:GENOTYPES VERSION:0.4.1 FORMAT:TEXT CONCATENATED INDEXED\n") File2.write( "@SCFNAME POS MN_FREQ Sample_1 Sample_2\n") for x in range(0, MAX): P=0 while(P<MINP or P>MAXP): P = round(triang.rvs(0.1), 2) Q=1-P td=random.random() mmmm, Mmmm, MMmm, mmMm, MmMm, MMMm, mmMM, MmMM, MMMM=getMs(P, S) if( min([mmmm, Mmmm, MMmm, mmMm, MmMm, MMMm, mmMM, MmMM, MMMM])<-0.0001): skipped+=1 continue Mmmm+=mmmm MMmm+=Mmmm mmMm+=MMmm MmMm+=mmMm
def WeightedRandom(sel, M, N, total, dist): if dist == "no": if sel == "exp": rand = [] expvar = M ctr = 100 final = (-float(M)) / math.log(0.05) while ctr < total and expvar > final: expvar = expvar - 500 ctr += 100 if expvar < final: expvar = final if total < 0: for i in range(0, N): rand.append(random.randint(0, M - 1)) while rand.count(rand[i]) > 1: rand[i] = random.randint(0, M - 1) else: for i in range(0, N): rand.append(int(expon.rvs(scale=expvar))) if rand[i] < 0: rand[i] = -rand[i] while rand[i] >= M or rand.count(rand[i]) > 1: rand[i] = int(expon.rvs(scale=expvar)) if rand[i] < 0: rand[i] = -rand[i] elif sel == "filter": rand = [] epscale = M ctr = 100 while ctr < total and epscale != 1000: epscale = epscale - 500 ctr += 100 if total < 0: for i in range(0, N): rand.append(random.randint(0, M - 1)) while rand.count(rand[i]) > 1: rand[i] = random.randint(0, M - 1) else: for i in range(0, N): rand.append(int(gennorm.rvs(5, scale=epscale))) if rand[i] < 0: rand[i] = -rand[i] while rand[i] >= M or rand.count(rand[i]) > 1: rand[i] = int(gennorm.rvs(5, scale=epscale)) if rand[i] < 0: rand[i] = -rand[i] elif sel == "triang": rand = [] triscale = M * 2 ctr = 100 final = M while ctr < total and triscale > M: triscale = triscale - 1000 ctr += 100 if triscale <= M: triscale = M + int(round(M / 5)) if total < 0: for i in range(0, N): rand.append(random.randint(0, M - 1)) while rand.count(rand[i]) > 1: rand[i] = random.randint(0, M - 1) else: for i in range(0, N): rand.append(int(triang.rvs(0, loc=0, scale=triscale))) if rand[i] < 0: rand[i] = -rand[i] while rand[i] >= M or rand.count(rand[i]) > 1: rand[i] = int(triang.rvs(0, loc=0, scale=triscale)) if rand[i] < 0: rand[i] = -rand[i] else: if sel == "exp": rand = [] expvar = (-float(M)) / math.log(0.05) if total < 0: for i in range(0, N): rand.append(random.randint(0, M - 1)) while rand.count(rand[i]) > 1: rand[i] = random.randint(0, M - 1) else: for i in range(0, N): rand.append(int(expon.rvs(scale=expvar))) if rand[i] < 0: rand[i] = -rand[i] while rand[i] >= M or rand.count(rand[i]) > 1: rand[i] = int(expon.rvs(scale=expvar)) if rand[i] < 0: rand[i] = -rand[i] elif sel == "filter": rand = [] epscale = M / 5 if total < 0: for i in range(0, N): rand.append(random.randint(0, M - 1)) while rand.count(rand[i]) > 1: rand[i] = random.randint(0, M - 1) else: for i in range(0, N): rand.append(int(gennorm.rvs(5, scale=epscale))) if rand[i] < 0: rand[i] = -rand[i] while rand[i] >= M or rand.count(rand[i]) > 1: rand[i] = int(gennorm.rvs(5, scale=epscale)) if rand[i] < 0: rand[i] = -rand[i] elif sel == "triang": rand = [] triscale = M + int(round(M / 10)) if total < 0: for i in range(0, N): rand.append(random.randint(0, M - 1)) while rand.count(rand[i]) > 1: rand[i] = random.randint(0, M - 1) else: for i in range(0, N): rand.append(int(triang.rvs(0, loc=0, scale=triscale))) if rand[i] < 0: rand[i] = -rand[i] while rand[i] >= M or rand.count(rand[i]) > 1: rand[i] = int(triang.rvs(0, loc=0, scale=triscale)) if rand[i] < 0: rand[i] = -rand[i] return rand