def test_rate_Nt_by_2_conductance(self): net = op.network.Cubic(shape=[1, 6, 1]) geom = op.geometry.StickAndBall(network=net) air = op.phases.Air(network=net) water = op.phases.Water(network=net) m = op.phases.MultiPhase(phases=[air, water], project=net.project) m.set_occupancy(phase=air, pores=[0, 1, 2]) m.set_occupancy(phase=water, pores=[3, 4, 5]) const = op.models.misc.constant K_water_air = 0.5 m.set_binary_partition_coef(propname="throat.partition_coef", phases=[water, air], model=const, value=K_water_air) m._set_automatic_throat_occupancy() _ = op.physics.Standard(network=net, phase=m, geometry=geom) alg = op.algorithms.GenericTransport(network=net, phase=m) alg.settings['conductance'] = 'throat.diffusive_conductance' alg.settings['quantity'] = 'pore.mole_fraction' alg.set_rate_BC(pores=0, values=1.235) alg.set_value_BC(pores=5, values=0.0) alg.run() rate = alg.rate(pores=5)[0] assert sp.isclose(rate, -1.235) # Rate at air-water interface throat (#2) must match imposed rate rate = alg.rate(throats=2)[0] assert sp.isclose(rate, 1.235) # Rate at interface pores (#2 @ air-side, #3 @ water-side) must be 0 rate_air_side = alg.rate(pores=2)[0] rate_water_side = alg.rate(pores=3)[0] assert sp.isclose(rate_air_side, 0.0) assert sp.isclose(rate_water_side, 0.0) # Net rate must always be zero at steady state conditions assert sp.isclose(alg.rate(pores=net.Ps), 0.0)
def test_rate_single(self): alg = op.algorithms.ReactiveTransport(network=self.net, phase=self.phase) alg.settings['conductance'] = 'throat.diffusive_conductance' alg.settings['quantity'] = 'pore.mole_fraction' alg.set_rate_BC(pores=self.net.pores("left"), values=1.235) alg.set_value_BC(pores=self.net.pores("right"), values=0.0) alg.run() rate = alg.rate(pores=self.net.pores("right"))[0] assert sp.isclose(rate, -1.235 * self.net.pores("right").size) # Net rate must always be zero at steady state conditions assert sp.isclose(alg.rate(pores=self.net.Ps), 0.0)
def test_rate_multiple(self): alg = op.algorithms.GenericTransport(network=self.net, phase=self.phase) alg.settings['conductance'] = 'throat.diffusive_conductance' alg.settings['quantity'] = 'pore.mole_fraction' alg.set_rate_BC(pores=[0, 1, 2, 3], values=1.235) # Note that pore = 0 is assigned two rate values (rate = sum(rates)) alg.set_rate_BC(pores=[5, 6, 19, 35, 0], values=3.455) alg.set_value_BC(pores=[50, 51, 52, 53], values=0.0) alg.run() rate = alg.rate(pores=[50, 51, 52, 53])[0] assert sp.isclose(rate, -(1.235 * 4 + 3.455 * 5)) # 4, 5 are number of pores # Net rate must always be zero at steady state conditions assert sp.isclose(alg.rate(pores=self.net.Ps), 0.0)
def test_mutliphase_partition_coef(self): m = op.phases.MultiPhase(network=self.net, phases=[self.water, self.air, self.oil]) x, y, z = self.net["pore.coords"].T ps_water = self.net.Ps[(y <= 3) + (y >= 8)] ps_air = self.net.Ps[(y > 3) * (y < 6)] ps_oil = self.net.Ps[(y >= 6) * (y < 8)] # Phase arrangement (y-axis): W | A | O | W m.set_occupancy(phase=self.water, pores=ps_water) m.set_occupancy(phase=self.air, pores=ps_air) m.set_occupancy(phase=self.oil, pores=ps_oil) const = op.models.misc.constant K_air_water = 2.0 K_air_oil = 1.8 K_water_oil = 0.73 m.set_binary_partition_coef(propname="throat.partition_coef", phases=[self.air, self.water], model=const, value=K_air_water) m.set_binary_partition_coef(propname="throat.partition_coef", phases=[self.air, self.oil], model=const, value=K_air_oil) m.set_binary_partition_coef(propname="throat.partition_coef", phases=[self.water, self.oil], model=const, value=K_water_oil) K_aw = m["throat.partition_coef.air:water"] K_ao = m["throat.partition_coef.air:oil"] K_wo = m["throat.partition_coef.water:oil"] K_global = m["throat.partition_coef.all"] assert sp.isclose(K_aw.mean(), K_air_water) assert sp.isclose(K_ao.mean(), K_air_oil) assert sp.isclose(K_wo.mean(), K_water_oil) # Get water-air interface throats tmp1 = self.net.find_neighbor_throats(ps_water, mode="xor") tmp2 = self.net.find_neighbor_throats(ps_air, mode="xor") Ts_water_air_interface = sp.intersect1d(tmp1, tmp2) # Get air-oil interface throats tmp1 = self.net.find_neighbor_throats(ps_air, mode="xor") tmp2 = self.net.find_neighbor_throats(ps_oil, mode="xor") Ts_air_oil_interface = sp.intersect1d(tmp1, tmp2) # Get oil-water interface throats tmp1 = self.net.find_neighbor_throats(ps_oil, mode="xor") tmp2 = self.net.find_neighbor_throats(ps_water, mode="xor") Ts_oil_water_interface = sp.intersect1d(tmp1, tmp2) # K_global for water-air interface must be 1/K_air_water assert sp.isclose(K_global[Ts_water_air_interface].mean(), 1 / K_air_water) # K_global for air-oil interface must be K_air_oil (not 1/K_air_oil) assert sp.isclose(K_global[Ts_air_oil_interface].mean(), K_air_oil) # K_global for oil-water interface must be 1/K_water_oil assert sp.isclose(K_global[Ts_oil_water_interface].mean(), 1 / K_water_oil) # K_global for single-phase regions must be 1.0 interface_throats = sp.hstack( (Ts_water_air_interface, Ts_air_oil_interface, Ts_oil_water_interface)) Ts_single_phase = sp.setdiff1d(self.net.Ts, interface_throats) assert sp.isclose(K_global[Ts_single_phase].mean(), 1.0)
def progress_one_step(self): k = self.k kn = self.k + 1 self.r[k] = -(self.kf[k] * (self.c_CH4[k] * 1e-6)**self.nf - self.kb[k] * (self.c_H2[k] * 1e-6)**self.mb) * 1e6 dcCH4 = self.r[k] * self.dt[k] dcCs = -dcCH4 dcH2 = -2 * dcCH4 self.N_CH4[kn] = (self.c_CH4[k] + dcCH4) * self.Q[k] self.N_H2[kn] = (self.c_H2[k] + dcH2) * self.Q[k] self.N_Cs[kn] = (self.c_Cs[k] + dcCs) * self.Q[k] self.N_g[kn] = self.N_CH4[kn] + self.N_H2[kn] + self.N_N2 self.x_CH4[kn] = self.N_CH4[kn] / self.N_g[kn] self.x_H2[kn] = self.N_H2[kn] / self.N_g[kn] self.x_N2[kn] = self.N_N2 / self.N_g[kn] assert sp.isclose(self.x_CH4[kn] + self.x_H2[kn] + self.x_N2[kn], 1) self.M[kn] = self.x_CH4[kn]*self.M_CH4 + self.x_N2[kn]*self.M_N2 \ + self.x_H2[kn]*self.M_H2 self.p[kn] = self.p[k] - self.dx * \ (150*self.mu[k]*self.Vc[k]/self.Lc**2 + 1.75*self.rho[k]*self.Vc[k]**2/self.Lc ) self.rho[kn] = self.p[kn] * self.M[kn] / (self.R_u * self.T[kn]) self.m_CH4[kn] = self.N_CH4[kn] * self.M_CH4 self.m_H2[kn] = self.N_H2[kn] * self.M_H2 self.m_Cs[kn] = self.N_Cs[kn] * self.M_Cs self.m_g[kn] = self.m_CH4[kn] + self.m_H2[kn] + self.m_N2 self.rhoCH4[kn] = rhoCH4(self.T[kn], self.p[kn]) self.rhoH2[kn] = rhoH2(self.T[kn], self.p[kn]) self.rhoN2[kn] = rhoN2(self.T[kn], self.p[kn]) self.Q[kn] = self.m_CH4[kn] / self.rhoCH4[kn] \ + self.m_H2[kn] / self.rhoH2[kn] \ + self.m_N2 / self.rhoN2[kn] self.Vc[kn] = self.Q[kn] / (self.eps * self.Ac) # / 0.34 #/ 0.37 self.dt[kn] = self.dx / self.Vc[kn] self.c_CH4[kn] = self.N_CH4[kn] / self.Q[kn] self.c_H2[kn] = self.N_H2[kn] / self.Q[kn] self.c_N2[kn] = self.N_N2 / self.Q[kn] self.c_Cs[kn] = self.N_Cs[kn] / self.Q[kn]
def gen_unrelated_eur_1k_data( input_file='/home/bjarni/TheHonestGene/faststorage/1Kgenomes/phase3/1k_genomes_hg.hdf5', out_file='/home/bjarni/PCMA/faststorage/1_DATA/1k_genomes/1K_genomes_phase3_EUR_unrelated.hdf5', maf_thres=0.01, max_relatedness=0.05, K_thinning_frac=0.1, debug=False): h5f = h5py.File(input_file) num_indivs = len(h5f['indivs']['continent']) eur_filter = h5f['indivs']['continent'][...] == 'EUR' num_eur_indivs = sp.sum(eur_filter) print 'Number of European individuals: %d', num_eur_indivs K = sp.zeros((num_eur_indivs, num_eur_indivs), dtype='float64') num_snps = 0 std_thres = sp.sqrt(2.0 * (1 - maf_thres) * (maf_thres)) print 'Calculating kinship' for chrom in range(1, 23): print 'Working on Chromosome %d' % chrom chrom_str = 'chr%d' % chrom print 'Loading SNPs and data' snps = sp.array(h5f[chrom_str]['calldata']['snps'][...], dtype='int8') print 'Loading NTs' ref_nts = h5f[chrom_str]['variants']['REF'][...] alt_nts = h5f[chrom_str]['variants']['ALT'][...] print 'Filtering multi-allelic SNPs' multi_allelic_filter = sp.negative( h5f[chrom_str]['variants']['MULTI_ALLELIC'][...]) snps = snps[multi_allelic_filter] ref_nts = ref_nts[multi_allelic_filter] alt_nts = alt_nts[multi_allelic_filter] if K_thinning_frac < 1: print 'Thinning SNPs for kinship calculation' thinning_filter = sp.random.random(len(snps)) < K_thinning_frac snps = snps[thinning_filter] alt_nts = alt_nts[thinning_filter] ref_nts = ref_nts[thinning_filter] print 'Filter SNPs with missing NT information' nt_filter = sp.in1d(ref_nts, ok_nts) nt_filter = nt_filter * sp.in1d(alt_nts, ok_nts) if sp.sum(nt_filter) < len(nt_filter): snps = snps[nt_filter] print 'Filtering non-European individuals' snps = snps[:, eur_filter] print 'Filtering SNPs with MAF <', maf_thres snp_stds = sp.std(snps, 1) maf_filter = snp_stds.flatten() > std_thres snps = snps[maf_filter] snp_stds = snp_stds[maf_filter] print '%d SNPs remaining after all filtering steps.' % len(snps) print 'Normalizing SNPs' snp_means = sp.mean(snps, 1) norm_snps = (snps - snp_means[sp.newaxis].T) / snp_stds[sp.newaxis].T print 'Updating kinship' K += sp.dot(norm_snps.T, norm_snps) num_snps += len(norm_snps) assert sp.isclose( sp.sum(sp.diag(K)) / (num_snps * num_eur_indivs), 1.0) K = K / float(num_snps) print 'Kinship calculation done using %d SNPs\n' % num_snps # Filter individuals print 'Filtering individuals' keep_indiv_set = set(range(num_eur_indivs)) for i in range(num_eur_indivs): if i in keep_indiv_set: for j in range(i + 1, num_eur_indivs): if K[i, j] > max_relatedness: if j in keep_indiv_set: keep_indiv_set.remove(j) keep_indivs = list(keep_indiv_set) keep_indivs.sort() print 'Retained %d individuals\n' % len(keep_indivs) # Checking that everything is ok! K_ok = K[keep_indivs] K_ok = K_ok[:, keep_indivs] assert (K_ok - sp.tril(K_ok)).max() < max_relatedness indiv_filter = sp.zeros(num_indivs, dtype='bool8') indiv_filter[(sp.arange(num_indivs)[eur_filter])[keep_indivs]] = 1 assert sp.sum(indiv_filter) == len(keep_indivs) # Store in new file print 'Now storing data.' oh5f = h5py.File(out_file, 'w') indiv_ids = h5f['indivs']['indiv_ids'][indiv_filter] oh5f.create_dataset('indiv_ids', data=indiv_ids) for chrom in range(1, 23): print 'Working on Chromosome %d' % chrom chrom_str = 'chr%d' % chrom print 'Loading SNPs and data' snps = sp.array(h5f[chrom_str]['calldata']['snps'][...], dtype='int8') snp_ids = h5f[chrom_str]['variants']['ID'][...] positions = h5f[chrom_str]['variants']['POS'][...] print 'Loading NTs' ref_nts = h5f[chrom_str]['variants']['REF'][...] alt_nts = h5f[chrom_str]['variants']['ALT'][...] print 'Filtering multi-allelic SNPs' multi_allelic_filter = sp.negative( h5f[chrom_str]['variants']['MULTI_ALLELIC'][...]) snps = snps[multi_allelic_filter] ref_nts = ref_nts[multi_allelic_filter] alt_nts = alt_nts[multi_allelic_filter] positions = positions[multi_allelic_filter] snp_ids = snp_ids[multi_allelic_filter] print 'Filter individuals' snps = snps[:, indiv_filter] print 'Filter SNPs with missing NT information' nt_filter = sp.in1d(ref_nts, ok_nts) nt_filter = nt_filter * sp.in1d(alt_nts, ok_nts) if sp.sum(nt_filter) < len(nt_filter): snps = snps[nt_filter] ref_nts = ref_nts[nt_filter] alt_nts = alt_nts[nt_filter] positions = positions[nt_filter] snp_ids = snp_ids[nt_filter] print 'filter monomorphic SNPs' snp_stds = sp.std(snps, 1) mono_morph_filter = snp_stds > 0 snps = snps[mono_morph_filter] ref_nts = ref_nts[mono_morph_filter] alt_nts = alt_nts[mono_morph_filter] positions = positions[mono_morph_filter] snp_ids = snp_ids[mono_morph_filter] snp_stds = snp_stds[mono_morph_filter] snp_means = sp.mean(snps, 1) if debug: if K_thinning_frac < 1: print 'Thinning SNPs for kinship calculation' thinning_filter = sp.random.random(len(snps)) < K_thinning_frac k_snps = snps[thinning_filter] k_snp_stds = snp_stds[thinning_filter] print 'Filtering SNPs with MAF <', maf_thres maf_filter = k_snp_stds.flatten() > std_thres k_snps = k_snps[maf_filter] k_snp_stds = k_snp_stds[maf_filter] k_snp_means = sp.mean(k_snps) print 'Verifying that the Kinship makes sense' norm_snps = (k_snps - k_snp_means[sp.newaxis].T) / k_snp_stds[sp.newaxis].T K = sp.dot(norm_snps.T, norm_snps) num_snps += len(norm_snps) if sp.isclose( sp.sum(sp.diag(K)) / (num_snps * num_eur_indivs), 1.0) and (K - sp.tril(K)).max() < (max_relatedness * 1.5): print 'It looks OK!' else: raise Exception('Kinship looks wrong?') nts = sp.array([[nt1, nt2] for nt1, nt2 in izip(ref_nts, alt_nts)]) print 'Writing to disk' cg = oh5f.create_group(chrom_str) cg.create_dataset('snps', data=snps) cg.create_dataset('snp_means', data=snp_means[sp.newaxis].T) cg.create_dataset('snp_stds', data=snp_stds[sp.newaxis].T) cg.create_dataset('snp_ids', data=snp_ids) cg.create_dataset('positions', data=positions) cg.create_dataset('nts', data=nts) oh5f.flush() print 'Done writing to disk' # centimorgans = h5f[chrom_str]['centimorgans'][...] # cg.create_dataset('centimorgans',data=centimorgans) # # centimorgan_rates = h5f[chrom_str]['centimorgan_rates'][...] # cg.create_dataset('centimorgan_rates',data=centimorgan_rates) oh5f.close() h5f.close() print 'Done'
Reference: Least Angle Regression, Efron et al., 2004, The Annals of Statistics """ # Initial data signs = scipy.zeros(size_predictor) betas = scipy.zeros(size_predictor) indices_predictor = scipy.arange(size_predictor) vecy_fitted = scipy.zeros_like(vecy) beta_lars = [[0] * size_predictor] for k in range(size_predictor): vecc = (vecy - vecy_fitted) @ matx vecc_abs = scipy.absolute(vecc) maxc = vecc_abs.max() mask_maxc = scipy.isclose(vecc_abs, maxc) indices_predictor = np.reshape(indices_predictor, mask_maxc.shape, order='C') active = indices_predictor[mask_maxc] signs = scipy.where(vecc.item(0, active[0]) > 0, 1, -1) matx_active = signs * matx[:, active] u, s, vh = scipy.linalg.svd(matx_active, full_matrices=False) matg = vh.T @ scipy.diag(s**2) @ vh matg_inv = vh.T @ scipy.diag(scipy.reciprocal(s**2)) @ vh vec1 = scipy.ones(len(active)) scalara = (matg_inv.sum())**(-.5) vecw = scalara * matg_inv.sum(axis=1)
import scipy as sc import numpy as np import scipy.linalg as lin matrix = sc.random.normal(0, 1, [5, 5]) print matrix print eigenvalues, eigevectors = lin.eig(matrix) print eigenvalues, print eigevectors id = sc.eye(5, 5) eigevectors = sc.transpose(eigevectors) for eigenvalue, eigevector in zip(eigenvalues, eigevectors): print sc.isclose(sc.dot(matrix, eigevector), sc.dot(eigenvalue, eigevector)).all()
def get_data(label): material_root_path = "measurementData/materialProperties/" #A4_Isover_RKL-EJ/wc.txt material_paths = glob.glob(material_root_path+"*") # print(label, material_paths) # for material_path in material_paths[-4:]: # for material_path in material_paths[:1]: hankalat = [ "measurementData/materialProperties/C5_Luja_A", "measurementData/materialProperties/A4_Isover_RKL-EJ", "measurementData/materialProperties/A7_Vital-levy", "measurementData/materialProperties/A3_Isover_RKL", "measurementData/materialProperties/D5_Pellavaeriste_T3", "measurementData/materialProperties/A12_Tuulensuojaluja", "measurementData/materialProperties/D3_Vital", ] for material_path in material_paths: if os.path.split(material_path)[-1].split("_")[0] == label: break # print(material_path) # name = os.path.split(material_path)[-1] RHr = [] wr = [] with open(os.path.join(material_path, "wc.txt"),"r") as ifile: lines = ifile.readlines() for line in lines[2:]: # print(line) parts = [round(float(part),4) for part in line.split(",")] RHr.append(parts[0]) wr.append(parts[1]) # if len(wr) == 1: # continue # if sp.isclose(wr[-1],0): # continue if wr[-1] >= 2*wr[-2]: w = wr[:-1] RH = RHr[:-1] else: w = wr[:] RH = RHr[:] # 3 if material_path in hankalat: w = [(w[0]+w[1])/2, w[2]] + w RH = [0.22, 0.51] + RH xpoints = [min(RH)]*4 + [0.61] + [max(RH)]*4 else: w = [w[0]] + w RH = [0.1] + RH xpoints = [min(RH)]*4 + [0.8] + [max(RH)]*4 # w = [(w[0]+w[1])/2, w[2]] + w # RH = [0.22, 0.51] + RH # xpoints = [min(RH)]*4 + [0.61] + [max(RH)]*4 def tomin(x): tcki = [xpoints, list(x) + [0]*5, 3] return (w - interpolate.splev(RH, tcki, der=0))#*penalty x0 = [1]*5 # print(tomin(x)) res = optimize.least_squares(tomin, x0) tck = [xpoints, list(res.x) + [0]*5, 3] # RHsp = sp.linspace(0,RH[-1],1000) RHsp = sp.linspace(min(RH),max(RH),1000) wsp = interpolate.splev(RHsp, tck, der=0) xisp = interpolate.splev(RHsp, tck, der=1) # Filter wsp[wsp<0] = 0 if sp.isclose(RHsp[-1],1): RHextra = [] wextra = [] xiextra = [] else: RHextra = sp.linspace(RHsp[-1]+(RHsp[-1]-RHsp[-2]), 2, 200) # RHextra = sp.linspace(0.8, 1, 100000) # xi0 = xisp[-1]#(wsp[-1]-wsp[-2]) / (RHsp[-1]-RHsp[-2]) x0 = RHsp[-1] x1 = RHr[-1] y0 = wsp[-1] y1 = wr[-1] a = (x0**2*x1*xi0 + x0**2*y1 - x0*x1**2*xi0 - 2*x0*x1*y0 + x1**2*y0)/(x0**2 - 2*x0*x1 + x1**2) b = (-x0**2*xi0 + 2*x0*y0 - 2*x0*y1 + x1**2*xi0)/(x0**2 - 2*x0*x1 + x1**2) c = (-xi0*(x0 - x1) + y0 - y1)/(x0**2 - 2*x0*(x0 - x1) - x1**2) # funcs wextra = a + b*RHextra + c*RHextra**2 xiextra = b + 2*c*RHextra return sp.array(list(RHsp)+list(RHextra)), sp.array(list(wsp)+list(wextra)), sp.array(list(xisp)+list(xiextra))
def isInSpace(self, p): """Return true if the point p is in the affine space, false otherwise.""" p = scipy.array(p) close = scipy.isclose(self.getProjection(p), p) inSpace = scipy.all(close, axis=1) return scipy.expand_dims(inSpace, 1)
def score(self, X_train, T_train, X_test, T_test): Y = self.predict(X_train, T_train, X_test) return sp.mean(sp.isclose(Y, T_test))
def conservation(a_i, a_f): # print("a_i: " +str(a_i[0])) # print("a_f: " +str(a_f[0])) # print("error: " +str(sp.fabs(sp.divide((a_i - a_f),a_i))[0])) return str( False not in (sp.isclose(a_i,a_f)))
def calc_kinship(input_file='Data/1Kgenomes/1K_genomes_v3.hdf5' , out_file='Data/1Kgenomes/kinship.hdf5', maf_thres=0.01, figure_dir='', figure_fn='', snp_filter_frac=1, indiv_filter_frac=1, chrom_ok_snp_dict=None): import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print 'Loading Genotype from ' in_h5f = h5py.File(input_file) # eur_filter = in_h5f['indivs']['continent'][...] == 'EUR' # num_indivs = sp.sum(eur_filter) indiv_ids = in_h5f['indiv_ids'][...] indiv_filter = None if indiv_filter_frac < 1: indiv_filter = sp.array(sp.random.random(len(indiv_ids)) < indiv_filter_frac, dtype='bool8') indiv_ids = indiv_ids[indiv_filter] assert len(sp.unique(indiv_ids)) == len(indiv_ids) num_indivs = len(indiv_ids) ok_chromosome_dict = {} not_done = set(range(1, 23)) while len(not_done) > 0: chromosome_dict = {} K_all_snps = sp.zeros((num_indivs, num_indivs), dtype='float32') num_all_snps = 0 sum_indiv_genotypes_all_chrom = sp.zeros(num_indivs, dtype='float32') # snp_cov_all_snps = sp.zeros((num_indivs, num_indivs), dtype='float64') print 'Calculating kinship' for chrom in range(1, 23): print 'Working on Chromosome %d' % chrom chrom_str = 'chr%d' % chrom snp_filter = None if snp_filter_frac < 1: snp_filter = sp.random.random(len(in_h5f[chrom_str]['snps'])) < snp_filter_frac g_dict = get_genotype_data(in_h5f, chrom, maf_thres, indiv_filter=indiv_filter, snp_filter=snp_filter, randomize_sign=True, snps_signs=None, chrom_ok_snp_dict=chrom_ok_snp_dict) norm_snps = g_dict['norm_snps'] sum_indiv_genotypes = sp.sum(g_dict['norm_snps'], 0) sum_indiv_genotypes_all_chrom += sum_indiv_genotypes print 'Calculating chromosome kinship' K_unscaled = sp.array(sp.dot(norm_snps.T, norm_snps), dtype='float32') assert sp.isclose(sp.sum(sp.diag(K_unscaled)) / (len(norm_snps) * num_indivs), 1.0), '..bug' K_all_snps += K_unscaled num_all_snps += len(norm_snps) print 'SNP-cov normalisation' sum_indiv_genotypes = sp.sum(norm_snps, 0) sum_indiv_genotypes_all_chrom += sum_indiv_genotypes mean_indiv_genotypes = sum_indiv_genotypes / len(norm_snps) norm_snps = norm_snps - mean_indiv_genotypes print 'Calculating SNP covariance unscaled' snp_cov_unscaled = sp.array(sp.dot(norm_snps.T, norm_snps), dtype='float32') # snp_cov_all_snps += snp_cov_unscaled print 'Storing and updating things' chromosome_dict[chrom_str] = {'K_unscaled':K_unscaled, 'num_snps':len(norm_snps), 'sum_indiv_genotypes':sum_indiv_genotypes, 'snp_cov_unscaled':snp_cov_unscaled, 'snps_signs':g_dict['snps_signs']} if snp_filter_frac < 1: chromosome_dict[chrom_str]['snp_filter'] = snp_filter # snp_cov_all_snps = snp_cov_all_snps / float(num_all_snps) # K_all_snps = K_all_snps / float(num_all_snps) # print 'K_all_snps.shape: %s' % str(K_all_snps.shape) # print 'snp_cov_all_snps.shape: %s' % str(snp_cov_all_snps.shape) # print 'sp.diag(snp_cov_all_snps): %s' % str(sp.diag(snp_cov_all_snps)) # print 'sp.mean(sp.diag(snp_cov_all_snps)_: %s' % str(sp.mean(sp.diag(snp_cov_all_snps))) # print 'Full kinship and snp-covariance calculation done using %d SNPs\n' % num_all_snps mean_indiv_genotypes_all_chrom = sum_indiv_genotypes_all_chrom / num_all_snps print 'Individual gentoype mean found:' print mean_indiv_genotypes_all_chrom print 'Calculating chromosome-wise SNP-covariance and kinship matrices' for chrom in range(1, 23): if chrom in not_done: print 'Working on Chromosome %d' % chrom chrom_str = 'chr%d' % chrom snp_cov_leave_one_out = sp.zeros((num_indivs, num_indivs), dtype='float32') K_leave_one_out = sp.zeros((num_indivs, num_indivs), dtype='float32') num_snps_used = 0 sum_indiv_genotypes = sp.zeros(num_indivs, dtype='float32') for chrom2 in range(1, 23): chrom2_str = 'chr%d' % chrom2 if chrom2 != chrom: sum_indiv_genotypes += chromosome_dict[chrom2_str]['sum_indiv_genotypes'] K_leave_one_out += chromosome_dict[chrom2_str]['K_unscaled'] num_snps_used += chromosome_dict[chrom2_str]['num_snps'] assert sp.isclose(sp.sum(sp.diag(K_leave_one_out)) / (num_snps_used * num_indivs), 1.0), '..bug' mean_indiv_genotypes = sum_indiv_genotypes / num_snps_used for chrom2 in range(1, 23): chrom2_str = 'chr%d' % chrom2 if chrom2 != chrom: print 'Loading SNPs' snps_signs = chromosome_dict[chrom2_str]['snps_signs'] snp_filter = chromosome_dict[chrom2_str]['snp_filter'] g_dict = get_genotype_data(in_h5f, chrom2, maf_thres, indiv_filter=indiv_filter, snp_filter=snp_filter, randomize_sign=True, snps_signs=snps_signs, chrom_ok_snp_dict=chrom_ok_snp_dict) norm_snps = g_dict['norm_snps'] print 'SNP-cov normalisation' norm_snps = norm_snps - mean_indiv_genotypes print 'Calculating SNP covariance unscaled' snp_cov_unscaled = sp.dot(norm_snps.T, norm_snps) snp_cov_leave_one_out += snp_cov_unscaled snp_cov_leave_one_out = snp_cov_leave_one_out / num_snps_used K_leave_one_out = K_leave_one_out / num_snps_used assert (K_leave_one_out - sp.diag(K_leave_one_out)).max() < 0.1, '..bug' try: cholesky_decomp_inv_snp_cov = linalg.cholesky(linalg.pinv(sp.array(snp_cov_leave_one_out, dtype='float64'))) evals, evecs = linalg.eig(sp.array(K_leave_one_out, dtype='float64')) except: try: cholesky_decomp_inv_snp_cov = linalg.cholesky(linalg.pinv(sp.array(snp_cov_leave_one_out, dtype='float32'))) evals, evecs = linalg.eig(sp.array(K_leave_one_out, dtype='float32')) except: print 'Failed when obtaining the Cholesky decomposotion or eigen decomposition' print 'Moving on, trying again later.' continue sort_indices = sp.argsort(evals,) ordered_evals = evals[sort_indices] print ordered_evals[-10:] / sp.sum(ordered_evals) ordered_evecs = evecs[:, sort_indices] d = {} d['evecs_leave_one_out'] = ordered_evecs d['evals_leave_one_out'] = ordered_evals d['cholesky_decomp_inv_snp_cov'] = cholesky_decomp_inv_snp_cov d['K_leave_one_out'] = K_leave_one_out d['K_unscaled'] = chromosome_dict[chrom_str]['K_unscaled'] d['num_snps'] = chromosome_dict[chrom_str]['num_snps'] d['snp_cov_leave_one_out'] = snp_cov_leave_one_out ok_chromosome_dict[chrom_str] = d not_done.remove(chrom) # While loop ends here. K_all_snps = K_all_snps / float(num_all_snps) in_h5f.close() ok_chromosome_dict['K_all_snps'] = K_all_snps ok_chromosome_dict['num_all_snps'] = num_all_snps assert sp.sum((ok_chromosome_dict['chr1']['K_leave_one_out'] - ok_chromosome_dict['chr2']['K_leave_one_out']) ** 2) != 0 , 'Kinships are probably too similar.' print 'Calculating PCAs' evals, evecs = linalg.eigh(sp.array(K_all_snps, dtype='float64')) # PCA via eigen decomp evals[evals < 0] = 0 sort_indices = sp.argsort(evals,)[::-1] ordered_evals = evals[sort_indices] print ordered_evals[:10] / sp.sum(ordered_evals) pcs = evecs[:, sort_indices] tot = sum(evals) var_exp = [(i / tot) * 100 for i in sorted(evals, reverse=True)] print 'Total variance explained:', sp.sum(var_exp) ok_chromosome_dict['pcs'] = pcs ok_chromosome_dict['pcs_var_exp'] = var_exp if figure_dir is not None: plt.clf() plt.plot(pcs[:, 0], pcs[:, 1], 'k.') plt.title("Overall PCA") plt.xlabel('PC1') plt.xlabel('PC2') plt.tight_layout() plt.savefig(figure_dir + '/' + figure_fn, format='pdf') plt.clf() out_h5f = h5py.File(out_file) hu.dict_to_hdf5(ok_chromosome_dict, out_h5f) out_h5f.close() return ok_chromosome_dict
A.add(n - 1, n - 1, kt / (dx / 2) * Ac) b[-1] += kt / (dx / 2) * Ac * T_B # Solution # T = sp.linalg.solve(A,b) T = A.solve(b) return T print("START") # Number of control volumes n = 5 # Thermal conductivity (W/mK) kt = 1000 # Cross-sectional area (m2) Ac = 10e-3 # Lenght (m) L = 0.5 # Boundary temperatures (C) T_A = 100 T_B = 500 Tfull, dxFull = heatConduction1DConstantTemperatureBoundariesNoSources.main( n, kt, Ac, L, T_A, T_B) Tsparse = solver_1d(n, kt, Ac, L, T_A, T_B) assert all(sp.isclose(Tfull, Tsparse)) print("OK") print("END")
def calc_kinship(input_file='Data/1Kgenomes/1K_genomes_v3.hdf5', out_file='Data/1Kgenomes/kinship.hdf5', maf_thres=0.01, figure_dir='', figure_fn='', snp_filter_frac=1, indiv_filter_frac=1, chrom_ok_snp_dict=None): import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print 'Loading Genotype from ' in_h5f = h5py.File(input_file) # eur_filter = in_h5f['indivs']['continent'][...] == 'EUR' # num_indivs = sp.sum(eur_filter) indiv_ids = in_h5f['indiv_ids'][...] indiv_filter = None if indiv_filter_frac < 1: indiv_filter = sp.array( sp.random.random(len(indiv_ids)) < indiv_filter_frac, dtype='bool8') indiv_ids = indiv_ids[indiv_filter] assert len(sp.unique(indiv_ids)) == len(indiv_ids) num_indivs = len(indiv_ids) ok_chromosome_dict = {} not_done = set(range(1, 23)) while len(not_done) > 0: chromosome_dict = {} K_all_snps = sp.zeros((num_indivs, num_indivs), dtype='float32') num_all_snps = 0 sum_indiv_genotypes_all_chrom = sp.zeros(num_indivs, dtype='float32') # snp_cov_all_snps = sp.zeros((num_indivs, num_indivs), dtype='float64') print 'Calculating kinship' for chrom in range(1, 23): print 'Working on Chromosome %d' % chrom chrom_str = 'chr%d' % chrom snp_filter = None if snp_filter_frac < 1: snp_filter = sp.random.random(len( in_h5f[chrom_str]['snps'])) < snp_filter_frac g_dict = get_genotype_data(in_h5f, chrom, maf_thres, indiv_filter=indiv_filter, snp_filter=snp_filter, randomize_sign=True, snps_signs=None, chrom_ok_snp_dict=chrom_ok_snp_dict) norm_snps = g_dict['norm_snps'] sum_indiv_genotypes = sp.sum(g_dict['norm_snps'], 0) sum_indiv_genotypes_all_chrom += sum_indiv_genotypes print 'Calculating chromosome kinship' K_unscaled = sp.array(sp.dot(norm_snps.T, norm_snps), dtype='float32') assert sp.isclose( sp.sum(sp.diag(K_unscaled)) / (len(norm_snps) * num_indivs), 1.0), '..bug' K_all_snps += K_unscaled num_all_snps += len(norm_snps) print 'SNP-cov normalisation' sum_indiv_genotypes = sp.sum(norm_snps, 0) sum_indiv_genotypes_all_chrom += sum_indiv_genotypes mean_indiv_genotypes = sum_indiv_genotypes / len(norm_snps) norm_snps = norm_snps - mean_indiv_genotypes print 'Calculating SNP covariance unscaled' snp_cov_unscaled = sp.array(sp.dot(norm_snps.T, norm_snps), dtype='float32') # snp_cov_all_snps += snp_cov_unscaled print 'Storing and updating things' chromosome_dict[chrom_str] = { 'K_unscaled': K_unscaled, 'num_snps': len(norm_snps), 'sum_indiv_genotypes': sum_indiv_genotypes, 'snp_cov_unscaled': snp_cov_unscaled, 'snps_signs': g_dict['snps_signs'] } if snp_filter_frac < 1: chromosome_dict[chrom_str]['snp_filter'] = snp_filter # snp_cov_all_snps = snp_cov_all_snps / float(num_all_snps) # K_all_snps = K_all_snps / float(num_all_snps) # print 'K_all_snps.shape: %s' % str(K_all_snps.shape) # print 'snp_cov_all_snps.shape: %s' % str(snp_cov_all_snps.shape) # print 'sp.diag(snp_cov_all_snps): %s' % str(sp.diag(snp_cov_all_snps)) # print 'sp.mean(sp.diag(snp_cov_all_snps)_: %s' % str(sp.mean(sp.diag(snp_cov_all_snps))) # print 'Full kinship and snp-covariance calculation done using %d SNPs\n' % num_all_snps mean_indiv_genotypes_all_chrom = sum_indiv_genotypes_all_chrom / num_all_snps print 'Individual gentoype mean found:' print mean_indiv_genotypes_all_chrom print 'Calculating chromosome-wise SNP-covariance and kinship matrices' for chrom in range(1, 23): if chrom in not_done: print 'Working on Chromosome %d' % chrom chrom_str = 'chr%d' % chrom snp_cov_leave_one_out = sp.zeros((num_indivs, num_indivs), dtype='float32') K_leave_one_out = sp.zeros((num_indivs, num_indivs), dtype='float32') num_snps_used = 0 sum_indiv_genotypes = sp.zeros(num_indivs, dtype='float32') for chrom2 in range(1, 23): chrom2_str = 'chr%d' % chrom2 if chrom2 != chrom: sum_indiv_genotypes += chromosome_dict[chrom2_str][ 'sum_indiv_genotypes'] K_leave_one_out += chromosome_dict[chrom2_str][ 'K_unscaled'] num_snps_used += chromosome_dict[chrom2_str][ 'num_snps'] assert sp.isclose( sp.sum(sp.diag(K_leave_one_out)) / (num_snps_used * num_indivs), 1.0), '..bug' mean_indiv_genotypes = sum_indiv_genotypes / num_snps_used for chrom2 in range(1, 23): chrom2_str = 'chr%d' % chrom2 if chrom2 != chrom: print 'Loading SNPs' snps_signs = chromosome_dict[chrom2_str]['snps_signs'] snp_filter = chromosome_dict[chrom2_str]['snp_filter'] g_dict = get_genotype_data( in_h5f, chrom2, maf_thres, indiv_filter=indiv_filter, snp_filter=snp_filter, randomize_sign=True, snps_signs=snps_signs, chrom_ok_snp_dict=chrom_ok_snp_dict) norm_snps = g_dict['norm_snps'] print 'SNP-cov normalisation' norm_snps = norm_snps - mean_indiv_genotypes print 'Calculating SNP covariance unscaled' snp_cov_unscaled = sp.dot(norm_snps.T, norm_snps) snp_cov_leave_one_out += snp_cov_unscaled snp_cov_leave_one_out = snp_cov_leave_one_out / num_snps_used K_leave_one_out = K_leave_one_out / num_snps_used assert (K_leave_one_out - sp.diag(K_leave_one_out)).max() < 0.1, '..bug' try: cholesky_decomp_inv_snp_cov = linalg.cholesky( linalg.pinv( sp.array(snp_cov_leave_one_out, dtype='float64'))) evals, evecs = linalg.eig( sp.array(K_leave_one_out, dtype='float64')) except: try: cholesky_decomp_inv_snp_cov = linalg.cholesky( linalg.pinv( sp.array(snp_cov_leave_one_out, dtype='float32'))) evals, evecs = linalg.eig( sp.array(K_leave_one_out, dtype='float32')) except: print 'Failed when obtaining the Cholesky decomposotion or eigen decomposition' print 'Moving on, trying again later.' continue sort_indices = sp.argsort(evals, ) ordered_evals = evals[sort_indices] print ordered_evals[-10:] / sp.sum(ordered_evals) ordered_evecs = evecs[:, sort_indices] d = {} d['evecs_leave_one_out'] = ordered_evecs d['evals_leave_one_out'] = ordered_evals d['cholesky_decomp_inv_snp_cov'] = cholesky_decomp_inv_snp_cov d['K_leave_one_out'] = K_leave_one_out d['K_unscaled'] = chromosome_dict[chrom_str]['K_unscaled'] d['num_snps'] = chromosome_dict[chrom_str]['num_snps'] d['snp_cov_leave_one_out'] = snp_cov_leave_one_out ok_chromosome_dict[chrom_str] = d not_done.remove(chrom) # While loop ends here. K_all_snps = K_all_snps / float(num_all_snps) in_h5f.close() ok_chromosome_dict['K_all_snps'] = K_all_snps ok_chromosome_dict['num_all_snps'] = num_all_snps assert sp.sum((ok_chromosome_dict['chr1']['K_leave_one_out'] - ok_chromosome_dict['chr2']['K_leave_one_out'])** 2) != 0, 'Kinships are probably too similar.' print 'Calculating PCAs' evals, evecs = linalg.eigh(sp.array( K_all_snps, dtype='float64')) # PCA via eigen decomp evals[evals < 0] = 0 sort_indices = sp.argsort(evals, )[::-1] ordered_evals = evals[sort_indices] print ordered_evals[:10] / sp.sum(ordered_evals) pcs = evecs[:, sort_indices] tot = sum(evals) var_exp = [(i / tot) * 100 for i in sorted(evals, reverse=True)] print 'Total variance explained:', sp.sum(var_exp) ok_chromosome_dict['pcs'] = pcs ok_chromosome_dict['pcs_var_exp'] = var_exp if figure_dir is not None: plt.clf() plt.plot(pcs[:, 0], pcs[:, 1], 'k.') plt.title("Overall PCA") plt.xlabel('PC1') plt.xlabel('PC2') plt.tight_layout() plt.savefig(figure_dir + '/' + figure_fn, format='pdf') plt.clf() out_h5f = h5py.File(out_file) hu.dict_to_hdf5(ok_chromosome_dict, out_h5f) out_h5f.close() return ok_chromosome_dict
def solve_new(self): ########################################################################### # SOLVER LOOP ########################################################################### step = 0 self.t = 0 ts = [self.t] prev_time_for_prope_time = None while step < self.max_steps: # Check last step if self.t + float(self.dt_fe) > self.t_end: self.dt_fe = fe.Constant(self.t_end - self.t) # Check prope time elif self.t + float( self.dt_fe) > self.prope_times[self.prope_times_k]: prev_time_for_prope_time = float(self.dt_fe) self.dt_fe = fe.Constant(self.prope_times[self.prope_times_k] - self.t) self.prope_times_k += 1 # Progress time step += 1 self.t += float(self.dt_fe) ts.append(self.t) ############################## # Solver ############################## # Solve heat and moisture fe.solve(self.a_T == self.L_T, self.T, self.bc_T) fe.solve(self.a_phi == self.L_phi, self.phi, self.bc_phi) # Assing if self.order_2nd: self.T_old2.assign(self.T_old) self.phi_old2.assign(self.phi_old) # Update solved fields self.T_old.assign(self.T) self.phi_old.assign(self.phi) # Update material properties phi_old_int = fe.interpolate(self.phi_old, self.v_materials) # w_int = fe.interpolate(self.w, self.v_materials) self.w.x_k = phi_old_int self.kT.x_k = phi_old_int self.delta_p.x_k = phi_old_int self.Dw.x_k = phi_old_int self.xi.x_k = phi_old_int ############################## # Post ############################## if sp.isclose(self.t, self.prope_times[self.prope_times_k]): self.prope() print("step=%i progress=%.2f t=%.2f (d) dt=%.2f (h)" % (step, self.t / self.t_end, self.t / s2d, float(self.dt_fe) / s2h)) self.save_time() elif step % 100 == 0: print("step=%i progress=%.2f t=%.2f (d) dt=%.2f (h)" % (step, self.t / self.t_end, self.t / s2d, float(self.dt_fe) / s2h)) self.save_time() ############################## # Next step ############################## # Check end if sp.isclose(self.t, self.t_end) or self.t > self.t_end: break # Increase timestep if prev_time_for_prope_time: self.dt_fe = fe.Constant(prev_time_for_prope_time) prev_time_for_prope_time = None self.dt_fe *= self.time_gamma if float(self.dt_fe) > self.max_dt: self.dt_fe = fe.Constant(self.max_dt)
def gen_unrelated_eur_1k_data(input_file='/home/bjarni/TheHonestGene/faststorage/1Kgenomes/phase3/1k_genomes_hg.hdf5' , out_file='/home/bjarni/PCMA/faststorage/1_DATA/1k_genomes/1K_genomes_phase3_EUR_unrelated.hdf5', maf_thres=0.01, max_relatedness=0.05, K_thinning_frac=0.1, debug=False): h5f = h5py.File(input_file) num_indivs = len(h5f['indivs']['continent']) eur_filter = h5f['indivs']['continent'][...] == 'EUR' num_eur_indivs = sp.sum(eur_filter) print 'Number of European individuals: %d', num_eur_indivs K = sp.zeros((num_eur_indivs, num_eur_indivs), dtype='single') num_snps = 0 std_thres = sp.sqrt(2.0 * (1 - maf_thres) * (maf_thres)) print 'Calculating kinship' for chrom in range(1, 23): print 'Working on Chromosome %d' % chrom chrom_str = 'chr%d' % chrom print 'Loading SNPs and data' snps = sp.array(h5f[chrom_str]['calldata']['snps'][...], dtype='int8') print 'Loading NTs' ref_nts = h5f[chrom_str]['variants']['REF'][...] alt_nts = h5f[chrom_str]['variants']['ALT'][...] print 'Filtering multi-allelic SNPs' multi_allelic_filter = sp.negative(h5f[chrom_str]['variants']['MULTI_ALLELIC'][...]) snps = snps[multi_allelic_filter] ref_nts = ref_nts[multi_allelic_filter] alt_nts = alt_nts[multi_allelic_filter] if K_thinning_frac < 1: print 'Thinning SNPs for kinship calculation' thinning_filter = sp.random.random(len(snps)) < K_thinning_frac snps = snps[thinning_filter] alt_nts = alt_nts[thinning_filter] ref_nts = ref_nts[thinning_filter] print 'Filter SNPs with missing NT information' nt_filter = sp.in1d(ref_nts, ok_nts) nt_filter = nt_filter * sp.in1d(alt_nts, ok_nts) if sp.sum(nt_filter) < len(nt_filter): snps = snps[nt_filter] print 'Filtering non-European individuals' snps = snps[:, eur_filter] print 'Filtering SNPs with MAF <', maf_thres snp_stds = sp.std(snps, 1) maf_filter = snp_stds.flatten() > std_thres snps = snps[maf_filter] snp_stds = snp_stds[maf_filter] print '%d SNPs remaining after all filtering steps.' % len(snps) print 'Normalizing SNPs' snp_means = sp.mean(snps, 1) norm_snps = (snps - snp_means[sp.newaxis].T) / snp_stds[sp.newaxis].T print 'Updating kinship' K += sp.dot(norm_snps.T, norm_snps) num_snps += len(norm_snps) assert sp.isclose(sp.sum(sp.diag(K)) / (num_snps * num_eur_indivs), 1.0) K = K / float(num_snps) print 'Kinship calculation done using %d SNPs\n' % num_snps # Filter individuals print 'Filtering individuals' keep_indiv_set = set(range(num_eur_indivs)) for i in range(num_eur_indivs): if i in keep_indiv_set: for j in range(i + 1, num_eur_indivs): if K[i, j] > max_relatedness: if j in keep_indiv_set: keep_indiv_set.remove(j) keep_indivs = list(keep_indiv_set) keep_indivs.sort() print 'Retained %d individuals\n' % len(keep_indivs) # Checking that everything is ok! K_ok = K[keep_indivs] K_ok = K_ok[:, keep_indivs] assert (K_ok - sp.tril(K_ok)).max() < max_relatedness indiv_filter = sp.zeros(num_indivs, dtype='bool8') indiv_filter[(sp.arange(num_indivs)[eur_filter])[keep_indivs]] = 1 assert sp.sum(indiv_filter) == len(keep_indivs) # Store in new file print 'Now storing data.' oh5f = h5py.File(out_file, 'w') indiv_ids = h5f['indivs']['indiv_ids'][indiv_filter] oh5f.create_dataset('indiv_ids', data=indiv_ids) for chrom in range(1, 23): print 'Working on Chromosome %d' % chrom chrom_str = 'chr%d' % chrom print 'Loading SNPs and data' snps = sp.array(h5f[chrom_str]['calldata']['snps'][...], dtype='int8') snp_ids = h5f[chrom_str]['variants']['ID'][...] positions = h5f[chrom_str]['variants']['POS'][...] print 'Loading NTs' ref_nts = h5f[chrom_str]['variants']['REF'][...] alt_nts = h5f[chrom_str]['variants']['ALT'][...] print 'Filtering multi-allelic SNPs' multi_allelic_filter = sp.negative(h5f[chrom_str]['variants']['MULTI_ALLELIC'][...]) snps = snps[multi_allelic_filter] ref_nts = ref_nts[multi_allelic_filter] alt_nts = alt_nts[multi_allelic_filter] positions = positions[multi_allelic_filter] snp_ids = snp_ids[multi_allelic_filter] print 'Filter individuals' snps = snps[:, indiv_filter] print 'Filter SNPs with missing NT information' nt_filter = sp.in1d(ref_nts, ok_nts) nt_filter = nt_filter * sp.in1d(alt_nts, ok_nts) if sp.sum(nt_filter) < len(nt_filter): snps = snps[nt_filter] ref_nts = ref_nts[nt_filter] alt_nts = alt_nts[nt_filter] positions = positions[nt_filter] snp_ids = snp_ids[nt_filter] print 'filter monomorphic SNPs' snp_stds = sp.std(snps, 1) mono_morph_filter = snp_stds > 0 snps = snps[mono_morph_filter] ref_nts = ref_nts[mono_morph_filter] alt_nts = alt_nts[mono_morph_filter] positions = positions[mono_morph_filter] snp_ids = snp_ids[mono_morph_filter] snp_stds = snp_stds[mono_morph_filter] snp_means = sp.mean(snps, 1) if debug: if K_thinning_frac < 1: print 'Thinning SNPs for kinship calculation' thinning_filter = sp.random.random(len(snps)) < K_thinning_frac k_snps = snps[thinning_filter] k_snp_stds = snp_stds[thinning_filter] print 'Filtering SNPs with MAF <', maf_thres maf_filter = k_snp_stds.flatten() > std_thres k_snps = k_snps[maf_filter] k_snp_stds = k_snp_stds[maf_filter] k_snp_means = sp.mean(k_snps) print 'Verifying that the Kinship makes sense' norm_snps = (k_snps - k_snp_means[sp.newaxis].T) / k_snp_stds[sp.newaxis].T K = sp.dot(norm_snps.T, norm_snps) num_snps += len(norm_snps) if sp.isclose(sp.sum(sp.diag(K)) / (num_snps * num_eur_indivs), 1.0) and (K - sp.tril(K)).max() < (max_relatedness * 1.5): print 'It looks OK!' else: raise Exception('Kinship looks wrong?') nts = sp.array([[nt1, nt2] for nt1, nt2 in izip(ref_nts, alt_nts)]) print 'Writing to disk' cg = oh5f.create_group(chrom_str) cg.create_dataset('snps', data=snps) cg.create_dataset('snp_means', data=snp_means[sp.newaxis].T) cg.create_dataset('snp_stds', data=snp_stds[sp.newaxis].T) cg.create_dataset('snp_ids', data=snp_ids) cg.create_dataset('positions', data=positions) cg.create_dataset('nts', data=nts) oh5f.flush() print 'Done writing to disk' # centimorgans = h5f[chrom_str]['centimorgans'][...] # cg.create_dataset('centimorgans',data=centimorgans) # # centimorgan_rates = h5f[chrom_str]['centimorgan_rates'][...] # cg.create_dataset('centimorgan_rates',data=centimorgan_rates) oh5f.close() h5f.close() print 'Done'
step_estimate(state_seq, reward_seq, i), step_weight(lambda_val, i)) print ' weight sum:', sum([step_weight(lambda_val, j) for j in range(1, max_lookahead + 2)]) print return sum([step_weight(lambda_val, k) * step_estimate(state_seq, reward_seq, k) for k in range(1, max_lookahead + 2)]) def fn(lambda_val, problem): return TD(lambda_val, problem) - TD(1, problem) for idx, p in enumerate(problems): solved = False for guess in np.linspace(0.0, 0.9, 100): try: solution = optimize.newton(fn, guess, args=(p, )) if 0 <= solution < 0.99 and isclose(TD(1, p), TD(solution, p), tol): solved = True label = 'Unknown' if p.test: label = 'Correct' if isclose(solution, p.solution, tol) else 'Failed' print 'Problem {}: {} ({})'.format(idx + 1, solution, label) break except RuntimeError: # Failed to converge after n iterations pass if not solved: print 'Problem {}: Failed'.format(idx + 1)