def macina(parse, target, filelst, infl, supl): pointsdict = {} for path in filelst: data = parse(path) points = extract_point(data, target) points = filter(lambda x: x > infl, points) for i, p in enumerate(points): l = pointsdict.get(i, []) l.append(p) pointsdict[i] = l stats = [] if parse == parse_netperf: starts = pointsdict[0] ends = pointsdict[1] length = list(e - s for e, s in zip(ends, starts)) print "netperf hole lengths:", length avg = utils.average(length) var = utils.variance(length) q1, median, q3 = utils.quartiles(length) stats.append((length, (avg, var, min(length), q1, median, q3, max(length)))) else: for points in pointsdict.itervalues(): print "mesh points:", points avg = utils.average(points) var = utils.variance(points) q1, median, q3 = utils.quartiles(points) stats.append((points, (avg, var, min(points), q1, median, q3, max(points)))) return stats
def total_variance(code, bd, i): """find the total variance of the ith position, conditional on the i-1th position. """ if i == 0: aa = bd[0] return variance([code[aa, x, y] for x in nucs for y in nucs]) else: aa = bd[i - 1] first_term = mean( variance([code[aa, x, y] for y in nucs]) for x in nucs) second_term = variance( [mean(code[aa, x, y] for y in nucs) for x in nucs]) print first_term, second_term return first_term + second_term
def predicted_vs_actual_Zb(code, bd): L = len(bd) + 1 kmer_scores = [score_site(code, bd, kmer) for kmer in make_kmers(L)] pred_mu = sum( [mean([code[aa, n1, n2] for (n1, n2) in nuc_pairs]) for aa in bd]) pred_sigma_sq = sum( [variance([code[aa, n1, n2] for (n1, n2) in nuc_pairs]) for aa in bd]) pred_mean = exp(pred_mu + (pred_sigma_sq**2) / 2.0) obs_mu = mean(kmer_scores) obs_sigma_sq = variance(kmer_scores) print "mu:", pred_mu, obs_mu, (obs_mu - pred_mu) / obs_mu # should be very low print "sigma_sq:", pred_sigma_sq, obs_sigma_sq, ( obs_sigma_sq - pred_sigma_sq) / obs_sigma_sq # should be very low Zb_obs = sum(exp(-kmer_score) for kmer_score in kmer_scores) Zb_pred = (4**L) * exp(-pred_mu + pred_sigma_sq / 2.0) print Zb_pred, Zb_obs print(Zb_obs - Zb_pred) / Zb_obs
def print_statistics(data, label): avg = utils.average(data) var = utils.variance(data) minp = min(data) q1st, median, q3rd = utils.quartiles(data) maxp = max(data) print("%s: avg=%.3f, var=%.3f, min=%.3f, 1stq=%.3f, median=%.3f, 3rdq=%.3f, max=%.3f" % (label, avg, var, minp, q1st, median, q3rd, maxp))
def occs(code,bd,sites): site_energies = [score_site(code,bd,site) for site in sites] #background = [score_site(code,bd,random_site(L)) for i in range(G)] mu = sum([mean([code[aa,b] for b in "ACGT"]) for aa in bd]) sigma = sqrt(sum([variance([code[aa,b] for b in "ACGT"]) for aa in bd])) fg = sum(exp(-ep) for ep in site_energies) #test_bg = np.sum(np.exp(-background)) bg = ln_mean(-mu,sigma)*G #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100) return fg/(fg+bg)
def occs(code, bd, sites): site_energies = [score_site(code, bd, site) for site in sites] #background = [score_site(code,bd,random_site(L)) for i in range(G)] mu = sum([mean([code[aa, b] for b in "ACGT"]) for aa in bd]) sigma = sqrt(sum([variance([code[aa, b] for b in "ACGT"]) for aa in bd])) fg = sum(exp(-ep) for ep in site_energies) #test_bg = np.sum(np.exp(-background)) bg = ln_mean(-mu, sigma) * G #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100) return fg / (fg + bg)
def __init__(self, groups_num, group_size, input_size, group_labels=None, activation_function=relu): self.groups_num = groups_num self.group_size = group_size self.input_size = input_size self.group_labels = group_labels if group_labels else 2 ** np.arange(groups_num) self.activation_function = activation_function self.W_in = theano.shared(np.random.normal(loc=0.0, scale=variance(input_size), size=(input_size, groups_num, group_size)).astype(floatX)) # name="{}_W_in".format(groups_num)) # Weights for recurrent connection within the group self.W_self = np.random.normal(loc=0.0, scale=0.01, size=(groups_num * group_size, groups_num, group_size)).astype(floatX) self.W_self_nullifier = np.zeros(self.W_self.shape, dtype=floatX) for dx in xrange(groups_num * group_size): for g in xrange(groups_num): if g >= (dx // group_size): self.W_self[dx][g] = 0. else: self.W_self_nullifier[dx, g] = 1. spng = rng.permutation(group_size) self.W_self[dx][g][spng[15:]] = 0. self.W_self = theano.shared(self.W_self, name="{}_W_self".format(groups_num)) # self.W_self = theano.shared(np.random.normal(loc=0.0, scale=0.01, # size=(groups_num * group_size, groups_num, group_size)).astype( # floatX), # name="{}_W_self".format(groups_num)) # self.biases = theano.shared( np.zeros((groups_num, group_size), dtype=floatX)) self.initial_activation = theano.shared(np.random.normal(loc=0.0, scale=variance(groups_num * group_size), size=groups_num * group_size).astype(floatX), name='init_activation') self.params = [self.W_self, self.W_in, self.biases, self.initial_activation] self.timestep = theano.shared(1)
def test_fw_method2(mu, sigma, N, trials=10000): xs = [ sum(exp(random.gauss(mu, sigma)) for i in range(N)) for j in xrange(trials) ] M, V = mean(xs), variance(xs) print "obs M,V,log(V/(M**2)):", M, V, log(V / (M**2)) ys = map(log, xs) m_obs, s_obs = mean(ys), sd(ys) m, s = fw_method(mu, sigma, N) print "pred:", m, s print "obs:", m_obs, s_obs
def exercicio1(): utils.print_header(1) x, y, labels = load_iris(os.path.join(constants.DATA_DIR, constants.FILENAME_IRIS_DATABASE)) a, d = x.shape # N samples, d attributes print('a)') for i in range(d): print('\tAttribute {}: Mean={:.3f}, Variance={:.3f}'.format(i, utils.mean(x[:, i]), utils.variance(x[:, i]))) print('b)') for i in range(labels.shape[0]): print('\tClass {}: {}'.format(i, labels[i])) for j in range(d): print('\t\tAttribute {}: Mean={:.3f}, Variance={:.3f}'.format( j, utils.mean(x[(y == i)[:, 0], j]), utils.variance(x[(y == i)[:, 0], j])) ) print('c)') print('\tThe histograms will be displayed') f, ax = plt.subplots(1, d, sharex=False, sharey=True) for j in range(d): # show title only in the top ax[j].set_title('Attribute {}'.format(j)) hist_bins = np.linspace(x[:, j].min(), x[:, j].max(), num=16) ax[j].hist(np.vstack([ x[(y == i)[:, 0], j] for i in range(labels.shape[0]) ]).T, bins=hist_bins, linewidth=0, color=['r', 'b', 'g']) plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio1-c.pdf') plt.legend(labels, loc='upper center', bbox_to_anchor=(0.5, 0.07), ncol=3, bbox_transform=plt.gcf().transFigure) plt.tight_layout() plt.subplots_adjust(bottom=0.15) f.set_figheight(3) f.set_figwidth(8) plt.savefig(plot_fname, bbox_inches='tight') plt.show() print('\tThis plot was saved: {}'.format(plot_fname)) print('d)') print('\tA plot will be displayed...') x_pca = utils.pca(x, n_components=2) # format the plot to mimic Slide 21 of Aula 3 x_pca[:, 1] *= -1 a = plt.scatter(x_pca[np.where(y == 0)[0], 1], x_pca[np.where(y == 0)[0], 0], c='r', marker='^', lw=0, s=100) b = plt.scatter(x_pca[np.where(y == 1)[0], 1], x_pca[np.where(y == 1)[0], 0], c='b', marker='o', lw=0, s=100) c = plt.scatter(x_pca[np.where(y == 2)[0], 1], x_pca[np.where(y == 2)[0], 0], c='g', marker='s', lw=0, s=100) plt.xlim([-1.5, 1.5]) plt.ylim([-4, 4]) plt.legend((a, b, c), tuple(labels), loc='upper left', fontsize=10) plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio1-d.pdf') plt.savefig(plot_fname, bbox_inches='tight') plt.show() print('\tThis plot was saved: {}'.format(plot_fname))
def __init__(self, shape, input_shape, activation_function=softmax): self.shape = shape self.input_shape = input_shape self.W_in = theano.shared(np.random.normal(loc=0.0, scale=variance(input_shape), size=(input_shape, shape)).astype(floatX), name="output_W_in") self.biases = theano.shared( np.zeros(shape, dtype=floatX)) # np.random.normal(loc=0.0, scale=variance(input_shape), # size=shape).astype(floatX), # name="output_biases") self.params = [self.W_in, self.biases] self.activation_function = activation_function
def occs(code, bd, sites): site_energies = [score_site(code, bd, site) for site in sites] #print "test background" #background = np.matrix([score_site(code,bd,random_site(L)) for i in trange(G)]) #print "finish test background" mu = sum([mean([code[aa, b1, b2] for (b1, b2) in nuc_pairs]) for aa in bd]) sigma = sqrt( sum([ variance([code[aa, b1, b2] for (b1, b2) in nuc_pairs]) for aa in bd ])) # XXX revisit w/ bd_variance fg = sum(exp(-ep) for ep in site_energies) #test_bg = np.sum(np.exp(-background)) bg = ln_mean(-mu, sigma) * G #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100) return fg / (fg + bg)
def test_Zb_approx(trials=10, G=5 * 10**6, L=10): predicted_Zb = exp(L * sigma**2 / 2.0 + log(G)) # a priori prediction matrix = [[random.gauss(0, sigma) for j in range(4)] for i in range(L)] score_mu = sum(mean(row) for row in matrix) score_sigma_sq = sum(variance(row, correct=False) for row in matrix) predicted_Zb2 = exp(score_mu + score_sigma_sq / 2 + log(G)) # prediction given matrix Zbs = [] for trial in trange(trials): eps = [sum(random.choice(row) for row in matrix) for i in range(G)] Zb = sum(exp(-ep) for ep in eps) Zbs.append(Zb) print "Predicted: %1.3e +/- %1.3e" % (predicted_Zb, sqrt(var_Zb(sigma, L, G))) print "Predicted2: %1.3e" % (predicted_Zb2) print "Actual: %1.3e +/- %1.3e" % (mean(Zbs), sd(Zbs))
def mu_summary_stat_experiment(): """Can we correlate copy number with a summary statistic?""" trials = 100 ep_mu = -2 ep_sigma = 5 G = 100 ts = [] copies = [] eps = [random.gauss(ep_mu,ep_sigma) for i in range(G)] mus = interpolate(-10,10,1000) eta = mean(eps) gamma = 1.0/variance(eps) print gamma plt.plot(*pl(lambda mu:mean_occ(eps,mu),mus)) plt.plot(*pl(lambda mu:G*fd(eta,mu,beta=gamma),mus)) plt.plot(*pl(lambda x:G/2.0,mus))
def sample_uniform_energy(matrix): mu = sum(map(mean, matrix)) sigma = sqrt(sum(map(lambda x:variance(x,correct=False), matrix))) ep_min = sum(map(min, matrix)) ep_max = sum(map(max, matrix)) M_min = 1/norm.pdf(ep_min, mu, sigma) M_max = 1/norm.pdf(ep_max, mu, sigma) M = max(M_min, M_max) trials = 0 while True: trials += 1 if trials % 10000 == 0: print trials site = random_site(L) ep = score_seq(matrix, site) ar = 1/(M*norm.pdf(ep, mu, sigma)) if random.random() < ar: return site
def sample_uniform_energy(matrix): mu = sum(map(mean, matrix)) sigma = sqrt(sum(map(lambda x: variance(x, correct=False), matrix))) ep_min = sum(map(min, matrix)) ep_max = sum(map(max, matrix)) M_min = 1 / norm.pdf(ep_min, mu, sigma) M_max = 1 / norm.pdf(ep_max, mu, sigma) M = max(M_min, M_max) trials = 0 while True: trials += 1 if trials % 10000 == 0: print trials site = random_site(L) ep = score_seq(matrix, site) ar = 1 / (M * norm.pdf(ep, mu, sigma)) if random.random() < ar: return site
def plot_results(vhdl_values, numpy_values, axes_data, name): error = [] for index in range(len(vhdl_values)): error.append(relative_error(numpy_values[index], vhdl_values[index])) error_mean = mean(error) variance_ = variance(error, error_mean) print('Error mean {} Variance {}'.format(error_mean, variance_)) fig, axes = plt.subplots(1, 2) axes[0].plot(axes_data[:len(vhdl_values)], vhdl_values) axes[0].set_title(name) axes[0].set_ylabel('Angle') axes[0].set_xlabel('Angle') axes[1].plot(axes_data[:len(vhdl_values)], error, '--*') axes[1].set_title('Relative Error') axes[1].set_ylabel('Error (%)') axes[1].set_xlabel('Angle') axes[1].set_ylim(-0.8, 1.5) plt.show()
def log_ZS_sophisticated((matrix, mu, Ne)): L = len(matrix) nu = Ne - 1 mat_mu = sum(map(mean,matrix)) mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix))) dfde = lambda ep: -nu*exp(ep-mu)/(1+exp(ep-mu)) - (ep-mat_mu)/mat_sigma**2 ep_min = sum(map(min, matrix)) ep_max = sum(map(max, matrix)) try: mode = secant_interval(dfde,ep_min - 20, ep_max + 20) except: print (matrix, mu, Ne) raise Exception kappa = -nu*(exp(mu-mode)/(1+exp(mu-mode))**2) - 1/mat_sigma**2 sigma_approx = sqrt(-1/kappa) integrand = lambda ep:dnorm(ep, mat_mu, mat_sigma) * (1+exp(ep-mu))**-nu gauss_max = dnorm(mode, mode, sigma_approx) integrand_max = integrand(mode) mean_ZS = integrand_max / gauss_max return L * log(4) + log(mean_ZS)
def log_ZS_sophisticated((matrix, mu, Ne)): L = len(matrix) nu = Ne - 1 mat_mu = sum(map(mean, matrix)) mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix))) dfde = lambda ep: -nu * exp(ep - mu) / (1 + exp(ep - mu)) - ( ep - mat_mu) / mat_sigma**2 ep_min = sum(map(min, matrix)) ep_max = sum(map(max, matrix)) try: mode = secant_interval(dfde, ep_min - 20, ep_max + 20) except: print(matrix, mu, Ne) raise Exception kappa = -nu * (exp(mu - mode) / (1 + exp(mu - mode))**2) - 1 / mat_sigma**2 sigma_approx = sqrt(-1 / kappa) integrand = lambda ep: dnorm(ep, mat_mu, mat_sigma) * (1 + exp(ep - mu) )**-nu gauss_max = dnorm(mode, mode, sigma_approx) integrand_max = integrand(mode) mean_ZS = integrand_max / gauss_max return L * log(4) + log(mean_ZS)
def experimentCrossValidate(dataModule, times): PI = dataModule.protectedIndex PV = dataModule.protectedValue originalTrain, originalTest = dataModule.load() allData = originalTrain + originalTest variances = [[], [], []] #error, bias, ubif mins = [float('inf'), float('inf'), float('inf')] maxes = [-float('inf'), -float('inf'), -float('inf')] avgs = [0, 0, 0] for time in range(times): random.shuffle(allData) train = allData[:len(originalTrain)] test = allData[len(originalTrain):] output = statistics(train, test, PI, PV) print("\tavg, min, max, variance") print("error: %r" % (output[0], )) print("bias: %r" % (output[1], )) print("ubif: %r" % (output[2], )) for i in range(len(output)): avgs[i] += (output[i][0] - avgs[i]) / (time + 1) mins[i] = min(mins[i], output[i][1]) maxes[i] = max(maxes[i], output[i][2]) variances[i].append( output[i][0]) # was too lazy to implement online alg # warning: this doesn't take into account the variance of each split for i in range(len(variances)): variances[i] = variance(variances[i]) print("AGGREGATE STATISTICS:") print("\tavg, min, max, variance") print("error: %r" % ((avgs[0], mins[0], maxes[0], variances[0]), )) print("bias: %r" % ((avgs[1], mins[1], maxes[1], variances[1]), )) print("ubif: %r" % ((avgs[2], mins[2], maxes[2], variances[2]), ))
def anicam(parse, filelst, infl, supl): points = [] gpoints = [] for path in filelst: offset = None tmp = [] data = parse(path) for t, v in data: tmp.append((t,v)) if t >= infl and t <= supl: points.append(v) if offset == None and v == 0 and t >=supl: offset = 40 - t if offset == None: raise ValueError("Not found any 0") for t,v in tmp: gpoints.append(((t + offset), v)) avg = utils.average(points) var = utils.variance(points) q1, median, q3 = utils.quartiles(points) return gpoints, (avg, var, min(points), q1, median, q3, max(points))
def experimentCrossValidate(dataModule, times): PI = dataModule.protectedIndex PV = dataModule.protectedValue originalTrain, originalTest = dataModule.load() allData = originalTrain + originalTest variances = [[], [], []] #error, bias, ubif mins = [float('inf'), float('inf'), float('inf')] maxes = [-float('inf'), -float('inf'), -float('inf')] avgs = [0, 0, 0] for time in range(times): random.shuffle(allData) train = allData[:len(originalTrain)] test = allData[len(originalTrain):] output = statistics(train, test, PI, PV) print("\tavg, min, max, variance") print("error: %r" % (output[0],)) print("bias: %r" % (output[1],)) print("ubif: %r" % (output[2],)) for i in range(len(output)): avgs[i] += (output[i][0] - avgs[i]) / (time + 1) mins[i] = min(mins[i], output[i][1]) maxes[i] = max(maxes[i], output[i][2]) variances[i].append(output[i][0]) # was too lazy to implement online alg # warning: this doesn't take into account the variance of each split for i in range(len(variances)): variances[i] = variance(variances[i]) print("AGGREGATE STATISTICS:") print("\tavg, min, max, variance") print("error: %r" % ((avgs[0], mins[0], maxes[0], variances[0]),)) print("bias: %r" % ((avgs[1], mins[1], maxes[1], variances[1]),)) print("ubif: %r" % ((avgs[2], mins[2], maxes[2], variances[2]),))
def test_variance(): assert utils.variance(x) == (x.var(ddof=1))
L = len(matrix) for i in xrange(trials): ep = score_seq(matrix, random_site(L)) acc += (1 / (1 + exp(ep - mu)))**(Ne - 1) mean_Zs = acc / trials return L * log(4) + log(mean_Zs) def log_ZM_naive((matrix, mu, Ne), N, trials=1000): return N * log_ZS_naive((matrix, mu, Ne), trials=1000) def log_ZS_hack((matrix, mu, Ne), N): L = len(matrix) mat_mu = sum(map(mean, matrix)) mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix))) log_perc_below_threshold = norm.logcdf(mu - log((Ne - 1)), mat_mu, mat_sigma) log_Zs = L * log(4) + log_perc_below_threshold return log_Zs def log_ZM_hack((matrix, mu, Ne), N): log_ZS = log_ZS_hack((matrix, mu, Ne), N) return N * log_ZS def log_Z_hack((matrix, mu, Ne), N): L = len(matrix) mat_mu = sum(map(mean, matrix)) mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
for (aa, n) in zip(bd, site)) + bi_code[bd[-1], site[-2], site[-1]] def occs((li_code, bi_code), bd, sites): site_energies = [ score_site((li_code, bi_code), bd, site) for site in sites ] #print "test background" #background = np.matrix([score_site(code,bd,random_site(L)) for i in trange(G)]) #print "finish test background" mu = sum([mean([li_code[aa, b] for b in nucs]) for aa in bd]) + mean(bi_code[bd[-1], b1, b2] for b1, b2 in nuc_pairs) sigma = sqrt( sum([variance([li_code[aa, b] for b in nucs]) for aa in bd]) + variance([bi_code[bd[-1], b1, b2] for b1, b2 in nuc_pairs])) # XXX revisit w/ bd_variance fg = sum(exp(-ep) for ep in site_energies) #test_bg = np.sum(np.exp(-background)) bg = ln_mean(-mu, sigma) * G #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100) return fg / (fg + bg) def fitness(code, (bd, sites)): return occs(code, bd, sites) def moran_process(code, mutation_rate,
def aa_sigma(aa): return sqrt(variance([code[aa, b] for b in "ACGT"]))
acc = 0 nu = Ne - 1 L = len(matrix) for i in xrange(trials): ep = score_seq(matrix, random_site(L)) acc += (1/(1+exp(ep-mu)))**(Ne-1) mean_Zs = acc / trials return L * log(4) + log(mean_Zs) def log_ZM_naive((matrix, mu, Ne), N, trials=1000): return N * log_ZS_naive((matrix, mu, Ne), trials=1000) def log_ZS_hack((matrix, mu, Ne), N): L = len(matrix) mat_mu = sum(map(mean,matrix)) mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix))) log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma) log_Zs = L * log(4) + log_perc_below_threshold return log_Zs def log_ZM_hack((matrix, mu, Ne), N): log_ZS = log_ZS_hack((matrix, mu, Ne), N) return N * log_ZS def log_Z_hack((matrix, mu, Ne), N): L = len(matrix) mat_mu = sum(map(mean,matrix)) mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix))) log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma) log_Zs = L * log(4) + log_perc_below_threshold ans_ref = ((N*L * log(4)) + log_perc_below_threshold)
def aa_sigma(aa): return sqrt(variance([code[aa,b] for b in "ACGT"]))
bd = [li_aa]*(L-2) + [aa1,aa2,aa12] site = "".join([li_b]*(L-2) + [b1,b2]) sites = [site for i in range(n)] return bd,sites def score_site((li_code,bi_code),bd,site): return sum(li_code[aa,n] for (aa,n) in zip(bd,site)) + bi_code[bd[-1],site[-2],site[-1]] def occs((li_code,bi_code),bd,sites): site_energies = [score_site((li_code,bi_code),bd,site) for site in sites] #print "test background" #background = np.matrix([score_site(code,bd,random_site(L)) for i in trange(G)]) #print "finish test background" mu = sum([mean([li_code[aa,b] for b in nucs]) for aa in bd]) + mean(bi_code[bd[-1],b1,b2] for b1,b2 in nuc_pairs) sigma = sqrt(sum([variance([li_code[aa,b] for b in nucs]) for aa in bd]) + variance([bi_code[bd[-1],b1,b2] for b1,b2 in nuc_pairs])) # XXX revisit w/ bd_variance fg = sum(exp(-ep) for ep in site_energies) #test_bg = np.sum(np.exp(-background)) bg = ln_mean(-mu,sigma)*G #print "error: %1.2f" % ((bg - test_bg)/test_bg * 100) return fg/(fg+bg) def fitness(code,(bd,sites)): return occs(code,bd,sites) def moran_process(code,mutation_rate,N=1000,turns=10000, init=sample_species,mutate=mutate,fitness=fitness,pop=None): mean_rec_muts,mean_site_muts = mutation_rate/3.0,mutation_rate site_mu = mean_site_muts/float(n*L) bd_mu = mean_rec_muts/float(L)
def finish(self): f = open("pos_variances.txt","w") for i in range(len(self.posnames)): mean = utils.mean(self.counts[i]) f.write(self.posnames[i] + "\t" + str(mean) + "\t" + str(utils.median(self.counts[i])) + "\t" + str(utils.variance(self.counts[i])) + "\t" + str(utils.moment(self.counts[i],mean,3)) + "\t" + str(utils.moment(self.counts[i],mean,4)) + "\t" + str(len([x for x in self.counts[i] if x > 0])) + "\n")
def bi_aa_sigma(aa1,aa2,aa12): return sqrt(variance([bi_code[aa12,b1,b2] + li_code[aa1,b2] + li_code[aa2,b2] for b1,b2 in nuc_pairs]))
def bd_variance_ref(code, bd): kmer_scores = [score_site(code, bd, kmer) for kmer in make_kmers(L)] return variance(kmer_scores)
import numpy as np import math from utils import variance x1 = np.array([3 , 13 , 19 , 24 , 29]) mean_x1 = np.mean(x1) variance_x1 = variance(x1) print("estimated population variance variance_x1 : " , variance_x1) x2 = np.array([12 , 10 , 29, 33 , 38]) mean_x2 = np.mean(x2) variance_x2 = variance(x2) print("estimated population variance variance_x2 : " , variance_x2) num_measurements = x1.size covariance = np.dot(x1 - mean_x1 , x2 - mean_x2) / (num_measurements - 1) print("covariance : ",covariance) correlation = covariance/(math.sqrt(variance_x1) * math.sqrt(variance_x2) ) print("correlation (1 , -1) : ",correlation)
def site_sigma_from_matrix(matrix,correct=False): """return sd of site energies from matrix""" # agrees with estimate_site_sigma return sqrt(sum(map(lambda xs:variance(xs,correct=correct),matrix)))
fix = np.array(f["fix"]) scaling = float(np.array(f["scaling"])) from utils import MDSampler, loadmd from utils import variance, smile2mass loadrange = ["arr_" + str(i) for i in range(args.loadrange)] dataset = loadmd(args.dataset, loadrange, scaling, fix).to(device) SMILE = smile2mass(args.smile) if not args.double: dataset = dataset.to(torch.float32) if args.double: pVariance = torch.tensor( [variance(torch.tensor(item).double(), K) for item in SMILE], dtype=torch.float64).reshape(1, -1).repeat(3, 1).permute(1, 0).reshape(-1) else: pVariance = torch.tensor( [variance(torch.tensor(item), K) for item in SMILE], dtype=torch.float32).reshape(1, -1).repeat(3, 1).permute(1, 0).reshape(-1) target = MDSampler(dataset, pVariance=pVariance) def innerBuilder(num): maskList = [] for i in range(nlayers): if i % 2 == 0: b = torch.zeros(num)
def site_sigma_from_matrix(matrix): """return sd of site energies from matrix""" return sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
def bi_aa_sigma(aa1, aa2, aa12): return sqrt( variance([ bi_code[aa12, b1, b2] + li_code[aa1, b2] + li_code[aa2, b2] for b1, b2 in nuc_pairs ]))
def predict_median_Zb_from_matrix(matrix, G): score_mu = sum(mean(row) for row in matrix) score_sigma_sq = sum(variance(row, correct=False) for row in matrix) predicted_Zb = exp(-score_mu + log(G)) # prediction given matrix return predicted_Zb
def aa_sigma(aa): return sqrt(variance([code[aa, b1, b2] for b1, b2 in nuc_pairs]))
def Z_approx(matrix,n,Ne,G=5*10**6): """use log fitness approximation to compute partition function""" nu = Ne - 1 sigma_sq = sum(map(lambda xs:variance(xs,correct=False),matrix)) Zb = Zb_from_matrix(matrix,G)
def test_variance(self): var = variance([0, 4], [8, None, None, None, 7], 7.5) self.assertEqual(var, 0.5)