Пример #1
0
 def test_rate_Nt_by_2_conductance(self):
     net = op.network.Cubic(shape=[1, 6, 1])
     geom = op.geometry.StickAndBall(network=net)
     air = op.phases.Air(network=net)
     water = op.phases.Water(network=net)
     m = op.phases.MultiPhase(phases=[air, water], project=net.project)
     m.set_occupancy(phase=air, pores=[0, 1, 2])
     m.set_occupancy(phase=water, pores=[3, 4, 5])
     const = op.models.misc.constant
     K_water_air = 0.5
     m.set_binary_partition_coef(propname="throat.partition_coef",
                                 phases=[water, air],
                                 model=const,
                                 value=K_water_air)
     m._set_automatic_throat_occupancy()
     _ = op.physics.Standard(network=net, phase=m, geometry=geom)
     alg = op.algorithms.GenericTransport(network=net, phase=m)
     alg.settings['conductance'] = 'throat.diffusive_conductance'
     alg.settings['quantity'] = 'pore.mole_fraction'
     alg.set_rate_BC(pores=0, values=1.235)
     alg.set_value_BC(pores=5, values=0.0)
     alg.run()
     rate = alg.rate(pores=5)[0]
     assert sp.isclose(rate, -1.235)
     # Rate at air-water interface throat (#2) must match imposed rate
     rate = alg.rate(throats=2)[0]
     assert sp.isclose(rate, 1.235)
     # Rate at interface pores (#2 @ air-side, #3 @ water-side) must be 0
     rate_air_side = alg.rate(pores=2)[0]
     rate_water_side = alg.rate(pores=3)[0]
     assert sp.isclose(rate_air_side, 0.0)
     assert sp.isclose(rate_water_side, 0.0)
     # Net rate must always be zero at steady state conditions
     assert sp.isclose(alg.rate(pores=net.Ps), 0.0)
Пример #2
0
 def test_rate_single(self):
     alg = op.algorithms.ReactiveTransport(network=self.net,
                                           phase=self.phase)
     alg.settings['conductance'] = 'throat.diffusive_conductance'
     alg.settings['quantity'] = 'pore.mole_fraction'
     alg.set_rate_BC(pores=self.net.pores("left"), values=1.235)
     alg.set_value_BC(pores=self.net.pores("right"), values=0.0)
     alg.run()
     rate = alg.rate(pores=self.net.pores("right"))[0]
     assert sp.isclose(rate, -1.235 * self.net.pores("right").size)
     # Net rate must always be zero at steady state conditions
     assert sp.isclose(alg.rate(pores=self.net.Ps), 0.0)
Пример #3
0
 def test_rate_multiple(self):
     alg = op.algorithms.GenericTransport(network=self.net,
                                          phase=self.phase)
     alg.settings['conductance'] = 'throat.diffusive_conductance'
     alg.settings['quantity'] = 'pore.mole_fraction'
     alg.set_rate_BC(pores=[0, 1, 2, 3], values=1.235)
     # Note that pore = 0 is assigned two rate values (rate = sum(rates))
     alg.set_rate_BC(pores=[5, 6, 19, 35, 0], values=3.455)
     alg.set_value_BC(pores=[50, 51, 52, 53], values=0.0)
     alg.run()
     rate = alg.rate(pores=[50, 51, 52, 53])[0]
     assert sp.isclose(rate,
                       -(1.235 * 4 + 3.455 * 5))  # 4, 5 are number of pores
     # Net rate must always be zero at steady state conditions
     assert sp.isclose(alg.rate(pores=self.net.Ps), 0.0)
Пример #4
0
 def test_mutliphase_partition_coef(self):
     m = op.phases.MultiPhase(network=self.net,
                              phases=[self.water, self.air, self.oil])
     x, y, z = self.net["pore.coords"].T
     ps_water = self.net.Ps[(y <= 3) + (y >= 8)]
     ps_air = self.net.Ps[(y > 3) * (y < 6)]
     ps_oil = self.net.Ps[(y >= 6) * (y < 8)]
     # Phase arrangement (y-axis): W | A | O | W
     m.set_occupancy(phase=self.water, pores=ps_water)
     m.set_occupancy(phase=self.air, pores=ps_air)
     m.set_occupancy(phase=self.oil, pores=ps_oil)
     const = op.models.misc.constant
     K_air_water = 2.0
     K_air_oil = 1.8
     K_water_oil = 0.73
     m.set_binary_partition_coef(propname="throat.partition_coef",
                                 phases=[self.air, self.water],
                                 model=const,
                                 value=K_air_water)
     m.set_binary_partition_coef(propname="throat.partition_coef",
                                 phases=[self.air, self.oil],
                                 model=const,
                                 value=K_air_oil)
     m.set_binary_partition_coef(propname="throat.partition_coef",
                                 phases=[self.water, self.oil],
                                 model=const,
                                 value=K_water_oil)
     K_aw = m["throat.partition_coef.air:water"]
     K_ao = m["throat.partition_coef.air:oil"]
     K_wo = m["throat.partition_coef.water:oil"]
     K_global = m["throat.partition_coef.all"]
     assert sp.isclose(K_aw.mean(), K_air_water)
     assert sp.isclose(K_ao.mean(), K_air_oil)
     assert sp.isclose(K_wo.mean(), K_water_oil)
     # Get water-air interface throats
     tmp1 = self.net.find_neighbor_throats(ps_water, mode="xor")
     tmp2 = self.net.find_neighbor_throats(ps_air, mode="xor")
     Ts_water_air_interface = sp.intersect1d(tmp1, tmp2)
     # Get air-oil interface throats
     tmp1 = self.net.find_neighbor_throats(ps_air, mode="xor")
     tmp2 = self.net.find_neighbor_throats(ps_oil, mode="xor")
     Ts_air_oil_interface = sp.intersect1d(tmp1, tmp2)
     # Get oil-water interface throats
     tmp1 = self.net.find_neighbor_throats(ps_oil, mode="xor")
     tmp2 = self.net.find_neighbor_throats(ps_water, mode="xor")
     Ts_oil_water_interface = sp.intersect1d(tmp1, tmp2)
     # K_global for water-air interface must be 1/K_air_water
     assert sp.isclose(K_global[Ts_water_air_interface].mean(),
                       1 / K_air_water)
     # K_global for air-oil interface must be K_air_oil (not 1/K_air_oil)
     assert sp.isclose(K_global[Ts_air_oil_interface].mean(), K_air_oil)
     # K_global for oil-water interface must be 1/K_water_oil
     assert sp.isclose(K_global[Ts_oil_water_interface].mean(),
                       1 / K_water_oil)
     # K_global for single-phase regions must be 1.0
     interface_throats = sp.hstack(
         (Ts_water_air_interface, Ts_air_oil_interface,
          Ts_oil_water_interface))
     Ts_single_phase = sp.setdiff1d(self.net.Ts, interface_throats)
     assert sp.isclose(K_global[Ts_single_phase].mean(), 1.0)
Пример #5
0
    def progress_one_step(self):

        k = self.k
        kn = self.k + 1
        self.r[k] = -(self.kf[k] *
                      (self.c_CH4[k] * 1e-6)**self.nf - self.kb[k] *
                      (self.c_H2[k] * 1e-6)**self.mb) * 1e6

        dcCH4 = self.r[k] * self.dt[k]
        dcCs = -dcCH4
        dcH2 = -2 * dcCH4

        self.N_CH4[kn] = (self.c_CH4[k] + dcCH4) * self.Q[k]
        self.N_H2[kn] = (self.c_H2[k] + dcH2) * self.Q[k]
        self.N_Cs[kn] = (self.c_Cs[k] + dcCs) * self.Q[k]
        self.N_g[kn] = self.N_CH4[kn] + self.N_H2[kn] + self.N_N2

        self.x_CH4[kn] = self.N_CH4[kn] / self.N_g[kn]
        self.x_H2[kn] = self.N_H2[kn] / self.N_g[kn]
        self.x_N2[kn] = self.N_N2 / self.N_g[kn]

        assert sp.isclose(self.x_CH4[kn] + self.x_H2[kn] + self.x_N2[kn], 1)

        self.M[kn] = self.x_CH4[kn]*self.M_CH4 + self.x_N2[kn]*self.M_N2 \
                   + self.x_H2[kn]*self.M_H2

        self.p[kn] = self.p[k] - self.dx * \
                                 (150*self.mu[k]*self.Vc[k]/self.Lc**2
                                  + 1.75*self.rho[k]*self.Vc[k]**2/self.Lc
                                  )

        self.rho[kn] = self.p[kn] * self.M[kn] / (self.R_u * self.T[kn])

        self.m_CH4[kn] = self.N_CH4[kn] * self.M_CH4
        self.m_H2[kn] = self.N_H2[kn] * self.M_H2
        self.m_Cs[kn] = self.N_Cs[kn] * self.M_Cs
        self.m_g[kn] = self.m_CH4[kn] + self.m_H2[kn] + self.m_N2

        self.rhoCH4[kn] = rhoCH4(self.T[kn], self.p[kn])
        self.rhoH2[kn] = rhoH2(self.T[kn], self.p[kn])
        self.rhoN2[kn] = rhoN2(self.T[kn], self.p[kn])

        self.Q[kn] = self.m_CH4[kn] / self.rhoCH4[kn] \
                   + self.m_H2[kn]  / self.rhoH2[kn] \
                   + self.m_N2      / self.rhoN2[kn]

        self.Vc[kn] = self.Q[kn] / (self.eps * self.Ac)  # / 0.34 #/ 0.37
        self.dt[kn] = self.dx / self.Vc[kn]

        self.c_CH4[kn] = self.N_CH4[kn] / self.Q[kn]
        self.c_H2[kn] = self.N_H2[kn] / self.Q[kn]
        self.c_N2[kn] = self.N_N2 / self.Q[kn]
        self.c_Cs[kn] = self.N_Cs[kn] / self.Q[kn]
Пример #6
0
def gen_unrelated_eur_1k_data(
        input_file='/home/bjarni/TheHonestGene/faststorage/1Kgenomes/phase3/1k_genomes_hg.hdf5',
        out_file='/home/bjarni/PCMA/faststorage/1_DATA/1k_genomes/1K_genomes_phase3_EUR_unrelated.hdf5',
        maf_thres=0.01,
        max_relatedness=0.05,
        K_thinning_frac=0.1,
        debug=False):
    h5f = h5py.File(input_file)
    num_indivs = len(h5f['indivs']['continent'])
    eur_filter = h5f['indivs']['continent'][...] == 'EUR'
    num_eur_indivs = sp.sum(eur_filter)
    print 'Number of European individuals: %d', num_eur_indivs
    K = sp.zeros((num_eur_indivs, num_eur_indivs), dtype='float64')
    num_snps = 0
    std_thres = sp.sqrt(2.0 * (1 - maf_thres) * (maf_thres))

    print 'Calculating kinship'
    for chrom in range(1, 23):
        print 'Working on Chromosome %d' % chrom
        chrom_str = 'chr%d' % chrom

        print 'Loading SNPs and data'
        snps = sp.array(h5f[chrom_str]['calldata']['snps'][...], dtype='int8')

        print 'Loading NTs'
        ref_nts = h5f[chrom_str]['variants']['REF'][...]
        alt_nts = h5f[chrom_str]['variants']['ALT'][...]

        print 'Filtering multi-allelic SNPs'
        multi_allelic_filter = sp.negative(
            h5f[chrom_str]['variants']['MULTI_ALLELIC'][...])
        snps = snps[multi_allelic_filter]
        ref_nts = ref_nts[multi_allelic_filter]
        alt_nts = alt_nts[multi_allelic_filter]

        if K_thinning_frac < 1:
            print 'Thinning SNPs for kinship calculation'
            thinning_filter = sp.random.random(len(snps)) < K_thinning_frac
            snps = snps[thinning_filter]
            alt_nts = alt_nts[thinning_filter]
            ref_nts = ref_nts[thinning_filter]

        print 'Filter SNPs with missing NT information'
        nt_filter = sp.in1d(ref_nts, ok_nts)
        nt_filter = nt_filter * sp.in1d(alt_nts, ok_nts)
        if sp.sum(nt_filter) < len(nt_filter):
            snps = snps[nt_filter]

        print 'Filtering non-European individuals'
        snps = snps[:, eur_filter]

        print 'Filtering SNPs with MAF <', maf_thres
        snp_stds = sp.std(snps, 1)
        maf_filter = snp_stds.flatten() > std_thres
        snps = snps[maf_filter]
        snp_stds = snp_stds[maf_filter]

        print '%d SNPs remaining after all filtering steps.' % len(snps)

        print 'Normalizing SNPs'
        snp_means = sp.mean(snps, 1)
        norm_snps = (snps - snp_means[sp.newaxis].T) / snp_stds[sp.newaxis].T

        print 'Updating kinship'
        K += sp.dot(norm_snps.T, norm_snps)
        num_snps += len(norm_snps)
        assert sp.isclose(
            sp.sum(sp.diag(K)) / (num_snps * num_eur_indivs), 1.0)

    K = K / float(num_snps)
    print 'Kinship calculation done using %d SNPs\n' % num_snps

    # Filter individuals
    print 'Filtering individuals'
    keep_indiv_set = set(range(num_eur_indivs))
    for i in range(num_eur_indivs):
        if i in keep_indiv_set:
            for j in range(i + 1, num_eur_indivs):
                if K[i, j] > max_relatedness:
                    if j in keep_indiv_set:
                        keep_indiv_set.remove(j)
    keep_indivs = list(keep_indiv_set)
    keep_indivs.sort()
    print 'Retained %d individuals\n' % len(keep_indivs)

    # Checking that everything is ok!
    K_ok = K[keep_indivs]
    K_ok = K_ok[:, keep_indivs]
    assert (K_ok - sp.tril(K_ok)).max() < max_relatedness

    indiv_filter = sp.zeros(num_indivs, dtype='bool8')
    indiv_filter[(sp.arange(num_indivs)[eur_filter])[keep_indivs]] = 1

    assert sp.sum(indiv_filter) == len(keep_indivs)

    # Store in new file
    print 'Now storing data.'
    oh5f = h5py.File(out_file, 'w')
    indiv_ids = h5f['indivs']['indiv_ids'][indiv_filter]
    oh5f.create_dataset('indiv_ids', data=indiv_ids)
    for chrom in range(1, 23):
        print 'Working on Chromosome %d' % chrom
        chrom_str = 'chr%d' % chrom

        print 'Loading SNPs and data'
        snps = sp.array(h5f[chrom_str]['calldata']['snps'][...], dtype='int8')
        snp_ids = h5f[chrom_str]['variants']['ID'][...]
        positions = h5f[chrom_str]['variants']['POS'][...]

        print 'Loading NTs'
        ref_nts = h5f[chrom_str]['variants']['REF'][...]
        alt_nts = h5f[chrom_str]['variants']['ALT'][...]

        print 'Filtering multi-allelic SNPs'
        multi_allelic_filter = sp.negative(
            h5f[chrom_str]['variants']['MULTI_ALLELIC'][...])
        snps = snps[multi_allelic_filter]
        ref_nts = ref_nts[multi_allelic_filter]
        alt_nts = alt_nts[multi_allelic_filter]
        positions = positions[multi_allelic_filter]
        snp_ids = snp_ids[multi_allelic_filter]

        print 'Filter individuals'
        snps = snps[:, indiv_filter]

        print 'Filter SNPs with missing NT information'
        nt_filter = sp.in1d(ref_nts, ok_nts)
        nt_filter = nt_filter * sp.in1d(alt_nts, ok_nts)
        if sp.sum(nt_filter) < len(nt_filter):
            snps = snps[nt_filter]
            ref_nts = ref_nts[nt_filter]
            alt_nts = alt_nts[nt_filter]
            positions = positions[nt_filter]
            snp_ids = snp_ids[nt_filter]

        print 'filter monomorphic SNPs'
        snp_stds = sp.std(snps, 1)
        mono_morph_filter = snp_stds > 0
        snps = snps[mono_morph_filter]
        ref_nts = ref_nts[mono_morph_filter]
        alt_nts = alt_nts[mono_morph_filter]
        positions = positions[mono_morph_filter]
        snp_ids = snp_ids[mono_morph_filter]
        snp_stds = snp_stds[mono_morph_filter]

        snp_means = sp.mean(snps, 1)

        if debug:
            if K_thinning_frac < 1:
                print 'Thinning SNPs for kinship calculation'
                thinning_filter = sp.random.random(len(snps)) < K_thinning_frac
                k_snps = snps[thinning_filter]
                k_snp_stds = snp_stds[thinning_filter]

            print 'Filtering SNPs with MAF <', maf_thres
            maf_filter = k_snp_stds.flatten() > std_thres
            k_snps = k_snps[maf_filter]
            k_snp_stds = k_snp_stds[maf_filter]
            k_snp_means = sp.mean(k_snps)

            print 'Verifying that the Kinship makes sense'
            norm_snps = (k_snps -
                         k_snp_means[sp.newaxis].T) / k_snp_stds[sp.newaxis].T
            K = sp.dot(norm_snps.T, norm_snps)
            num_snps += len(norm_snps)
            if sp.isclose(
                    sp.sum(sp.diag(K)) / (num_snps * num_eur_indivs),
                    1.0) and (K - sp.tril(K)).max() < (max_relatedness * 1.5):
                print 'It looks OK!'
            else:
                raise Exception('Kinship looks wrong?')

        nts = sp.array([[nt1, nt2] for nt1, nt2 in izip(ref_nts, alt_nts)])

        print 'Writing to disk'
        cg = oh5f.create_group(chrom_str)
        cg.create_dataset('snps', data=snps)
        cg.create_dataset('snp_means', data=snp_means[sp.newaxis].T)
        cg.create_dataset('snp_stds', data=snp_stds[sp.newaxis].T)
        cg.create_dataset('snp_ids', data=snp_ids)
        cg.create_dataset('positions', data=positions)
        cg.create_dataset('nts', data=nts)
        oh5f.flush()
        print 'Done writing to disk'


#         centimorgans = h5f[chrom_str]['centimorgans'][...]
#         cg.create_dataset('centimorgans',data=centimorgans)
#
#         centimorgan_rates = h5f[chrom_str]['centimorgan_rates'][...]
#         cg.create_dataset('centimorgan_rates',data=centimorgan_rates)

    oh5f.close()
    h5f.close()
    print 'Done'
Пример #7
0
Reference:
Least Angle Regression, Efron et al., 2004, The Annals of Statistics
"""
# Initial data
signs = scipy.zeros(size_predictor)
betas = scipy.zeros(size_predictor)
indices_predictor = scipy.arange(size_predictor)
vecy_fitted = scipy.zeros_like(vecy)
beta_lars = [[0] * size_predictor]

for k in range(size_predictor):
    vecc = (vecy - vecy_fitted) @ matx
    vecc_abs = scipy.absolute(vecc)

    maxc = vecc_abs.max()
    mask_maxc = scipy.isclose(vecc_abs, maxc)
    indices_predictor = np.reshape(indices_predictor,
                                   mask_maxc.shape,
                                   order='C')
    active = indices_predictor[mask_maxc]
    signs = scipy.where(vecc.item(0, active[0]) > 0, 1, -1)

    matx_active = signs * matx[:, active]

    u, s, vh = scipy.linalg.svd(matx_active, full_matrices=False)
    matg = vh.T @ scipy.diag(s**2) @ vh
    matg_inv = vh.T @ scipy.diag(scipy.reciprocal(s**2)) @ vh
    vec1 = scipy.ones(len(active))
    scalara = (matg_inv.sum())**(-.5)

    vecw = scalara * matg_inv.sum(axis=1)
Пример #8
0
import scipy as sc
import numpy as np
import scipy.linalg as lin

matrix = sc.random.normal(0, 1, [5, 5])
print matrix
print
eigenvalues, eigevectors = lin.eig(matrix)
print eigenvalues,
print eigevectors
id = sc.eye(5, 5)
eigevectors = sc.transpose(eigevectors)

for eigenvalue, eigevector in zip(eigenvalues, eigevectors):
    print sc.isclose(sc.dot(matrix, eigevector),
                     sc.dot(eigenvalue, eigevector)).all()
Пример #9
0
def get_data(label):
    
    material_root_path = "measurementData/materialProperties/"
    #A4_Isover_RKL-EJ/wc.txt
    material_paths = glob.glob(material_root_path+"*")
    
#    print(label, material_paths)
    
#    for material_path in material_paths[-4:]:
#    for material_path in material_paths[:1]:
    
    
    hankalat = [
            "measurementData/materialProperties/C5_Luja_A",
            "measurementData/materialProperties/A4_Isover_RKL-EJ",
            "measurementData/materialProperties/A7_Vital-levy",
            "measurementData/materialProperties/A3_Isover_RKL",
            "measurementData/materialProperties/D5_Pellavaeriste_T3",
            "measurementData/materialProperties/A12_Tuulensuojaluja",
            "measurementData/materialProperties/D3_Vital",
            ]
    
    for material_path in material_paths:
        if os.path.split(material_path)[-1].split("_")[0] == label:
            break
         
#    print(material_path)
    
#    name = os.path.split(material_path)[-1]
    
    RHr = []
    wr = []
    
    with open(os.path.join(material_path, "wc.txt"),"r") as ifile:
        lines = ifile.readlines()
    for line in lines[2:]:
#            print(line)
        parts = [round(float(part),4) for part in line.split(",")]
        RHr.append(parts[0])
        wr.append(parts[1])
        
#    if len(wr) == 1:
#        continue
#    if sp.isclose(wr[-1],0):
#        continue
    
    
    if wr[-1] >= 2*wr[-2]:
        w = wr[:-1]
        RH = RHr[:-1]
    else:
        w = wr[:]
        RH = RHr[:]
   
    
    

    # 3 
    
    
    if material_path in hankalat:
        w = [(w[0]+w[1])/2, w[2]] + w
        RH = [0.22, 0.51] + RH
        xpoints = [min(RH)]*4 + [0.61] + [max(RH)]*4
    else:
        w = [w[0]] + w
        RH = [0.1] + RH
        xpoints = [min(RH)]*4 + [0.8] + [max(RH)]*4
    
    
#        w = [(w[0]+w[1])/2, w[2]] + w
#        RH = [0.22, 0.51] + RH
#        xpoints = [min(RH)]*4 + [0.61] + [max(RH)]*4
    
    
    def tomin(x):
        tcki = [xpoints,
            list(x) + [0]*5,
            3]
        
        
        return (w - interpolate.splev(RH, tcki, der=0))#*penalty


    x0 = [1]*5
#    print(tomin(x))
    res = optimize.least_squares(tomin, x0)
    
    
    
    tck = [xpoints,
            list(res.x) + [0]*5,
            3]
    
#        RHsp = sp.linspace(0,RH[-1],1000)
    RHsp = sp.linspace(min(RH),max(RH),1000)
    
    wsp = interpolate.splev(RHsp, tck, der=0)
    xisp = interpolate.splev(RHsp, tck, der=1)
    
    # Filter
    wsp[wsp<0] = 0
    
    
    
    
    if sp.isclose(RHsp[-1],1):
        RHextra = []    
        wextra = []
        xiextra = []
    else:
        RHextra = sp.linspace(RHsp[-1]+(RHsp[-1]-RHsp[-2]), 2, 200)
#            RHextra = sp.linspace(0.8, 1, 100000)
        
        #
        xi0 = xisp[-1]#(wsp[-1]-wsp[-2]) / (RHsp[-1]-RHsp[-2])

        x0 = RHsp[-1]
        x1 = RHr[-1]
        y0 = wsp[-1]
        y1 = wr[-1]

        a = (x0**2*x1*xi0 + x0**2*y1 - x0*x1**2*xi0 - 2*x0*x1*y0 + x1**2*y0)/(x0**2 - 2*x0*x1 + x1**2)
        b = (-x0**2*xi0 + 2*x0*y0 - 2*x0*y1 + x1**2*xi0)/(x0**2 - 2*x0*x1 + x1**2)
        c = (-xi0*(x0 - x1) + y0 - y1)/(x0**2 - 2*x0*(x0 - x1) - x1**2)
        
        # funcs
        wextra = a + b*RHextra + c*RHextra**2
        xiextra = b + 2*c*RHextra
            
            
    return sp.array(list(RHsp)+list(RHextra)), sp.array(list(wsp)+list(wextra)), sp.array(list(xisp)+list(xiextra))
 def isInSpace(self, p):
     """Return true if the point p is in the affine space, false otherwise."""
     p = scipy.array(p)
     close = scipy.isclose(self.getProjection(p),  p)
     inSpace = scipy.all(close, axis=1)
     return scipy.expand_dims(inSpace, 1)
Пример #11
0
    def score(self, X_train, T_train, X_test, T_test):
        Y = self.predict(X_train, T_train, X_test)

        return sp.mean(sp.isclose(Y, T_test))
def conservation(a_i, a_f):
    # print("a_i: " +str(a_i[0]))
    # print("a_f: " +str(a_f[0]))
    # print("error: " +str(sp.fabs(sp.divide((a_i - a_f),a_i))[0]))
    return str( False not in (sp.isclose(a_i,a_f)))
Пример #13
0
def calc_kinship(input_file='Data/1Kgenomes/1K_genomes_v3.hdf5' , out_file='Data/1Kgenomes/kinship.hdf5',
                  maf_thres=0.01, figure_dir='', figure_fn='', snp_filter_frac=1, indiv_filter_frac=1,
                  chrom_ok_snp_dict=None):
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    print 'Loading Genotype from '
    in_h5f = h5py.File(input_file)
#     eur_filter = in_h5f['indivs']['continent'][...] == 'EUR'
#     num_indivs = sp.sum(eur_filter)
    indiv_ids = in_h5f['indiv_ids'][...] 
    indiv_filter = None
    if indiv_filter_frac < 1:
        indiv_filter = sp.array(sp.random.random(len(indiv_ids)) < indiv_filter_frac, dtype='bool8')
        indiv_ids = indiv_ids[indiv_filter]
    assert len(sp.unique(indiv_ids)) == len(indiv_ids)
    num_indivs = len(indiv_ids) 
    

    ok_chromosome_dict = {}

    not_done = set(range(1, 23))
    while len(not_done) > 0:
        chromosome_dict = {}
        
        K_all_snps = sp.zeros((num_indivs, num_indivs), dtype='float32')
        num_all_snps = 0
        
        sum_indiv_genotypes_all_chrom = sp.zeros(num_indivs, dtype='float32')
#         snp_cov_all_snps = sp.zeros((num_indivs, num_indivs), dtype='float64')
        
        print 'Calculating kinship'
        
        for chrom in range(1, 23):
            print 'Working on Chromosome %d' % chrom
            chrom_str = 'chr%d' % chrom
            
            snp_filter = None
            if snp_filter_frac < 1:
                snp_filter = sp.random.random(len(in_h5f[chrom_str]['snps'])) < snp_filter_frac
                            
            g_dict = get_genotype_data(in_h5f, chrom, maf_thres, indiv_filter=indiv_filter,
                        snp_filter=snp_filter, randomize_sign=True, snps_signs=None, chrom_ok_snp_dict=chrom_ok_snp_dict)
            
            norm_snps = g_dict['norm_snps']
            
            sum_indiv_genotypes = sp.sum(g_dict['norm_snps'], 0)
            sum_indiv_genotypes_all_chrom += sum_indiv_genotypes
            
            print 'Calculating chromosome kinship'
            K_unscaled = sp.array(sp.dot(norm_snps.T, norm_snps), dtype='float32')
            assert sp.isclose(sp.sum(sp.diag(K_unscaled)) / (len(norm_snps) * num_indivs), 1.0), '..bug' 
            K_all_snps += K_unscaled
            num_all_snps += len(norm_snps)
    
            print 'SNP-cov normalisation'
            sum_indiv_genotypes = sp.sum(norm_snps, 0)
            sum_indiv_genotypes_all_chrom += sum_indiv_genotypes
            mean_indiv_genotypes = sum_indiv_genotypes / len(norm_snps)
            norm_snps = norm_snps - mean_indiv_genotypes
            
            print 'Calculating SNP covariance unscaled'
            
            snp_cov_unscaled = sp.array(sp.dot(norm_snps.T, norm_snps), dtype='float32')
#             snp_cov_all_snps += snp_cov_unscaled
            
            print 'Storing and updating things'
            chromosome_dict[chrom_str] = {'K_unscaled':K_unscaled, 'num_snps':len(norm_snps),
                                          'sum_indiv_genotypes':sum_indiv_genotypes,
                                          'snp_cov_unscaled':snp_cov_unscaled,
                                          'snps_signs':g_dict['snps_signs']}
            
            if snp_filter_frac < 1:
                chromosome_dict[chrom_str]['snp_filter'] = snp_filter
    
#         snp_cov_all_snps = snp_cov_all_snps / float(num_all_snps)
#         K_all_snps = K_all_snps / float(num_all_snps)
#         print 'K_all_snps.shape: %s' % str(K_all_snps.shape)
#         print 'snp_cov_all_snps.shape: %s' % str(snp_cov_all_snps.shape)
#         print 'sp.diag(snp_cov_all_snps): %s' % str(sp.diag(snp_cov_all_snps))
#         print 'sp.mean(sp.diag(snp_cov_all_snps)_: %s' % str(sp.mean(sp.diag(snp_cov_all_snps)))
        
#         print 'Full kinship and snp-covariance calculation done using %d SNPs\n' % num_all_snps
        
        mean_indiv_genotypes_all_chrom = sum_indiv_genotypes_all_chrom / num_all_snps
        print 'Individual gentoype mean found:'
        print mean_indiv_genotypes_all_chrom
        
        print 'Calculating chromosome-wise SNP-covariance and kinship matrices'
        for chrom in range(1, 23):
            if chrom in not_done:
                print 'Working on Chromosome %d' % chrom
                chrom_str = 'chr%d' % chrom
                
                snp_cov_leave_one_out = sp.zeros((num_indivs, num_indivs), dtype='float32')
                K_leave_one_out = sp.zeros((num_indivs, num_indivs), dtype='float32')
                num_snps_used = 0 
                
                sum_indiv_genotypes = sp.zeros(num_indivs, dtype='float32')
                
                for chrom2 in range(1, 23):
                    chrom2_str = 'chr%d' % chrom2
                    if chrom2 != chrom: 
                        sum_indiv_genotypes += chromosome_dict[chrom2_str]['sum_indiv_genotypes']
                        K_leave_one_out += chromosome_dict[chrom2_str]['K_unscaled']
                        num_snps_used += chromosome_dict[chrom2_str]['num_snps']
                        assert sp.isclose(sp.sum(sp.diag(K_leave_one_out)) / (num_snps_used * num_indivs), 1.0), '..bug' 
        
                mean_indiv_genotypes = sum_indiv_genotypes / num_snps_used
        
                for chrom2 in range(1, 23):
                    chrom2_str = 'chr%d' % chrom2
                    if chrom2 != chrom: 
                        print 'Loading SNPs'
                        snps_signs = chromosome_dict[chrom2_str]['snps_signs']
                        snp_filter = chromosome_dict[chrom2_str]['snp_filter']
                        g_dict = get_genotype_data(in_h5f, chrom2, maf_thres, indiv_filter=indiv_filter,
                                                   snp_filter=snp_filter, randomize_sign=True,
                                                   snps_signs=snps_signs, chrom_ok_snp_dict=chrom_ok_snp_dict)
                        norm_snps = g_dict['norm_snps']
                        print 'SNP-cov normalisation'
                        norm_snps = norm_snps - mean_indiv_genotypes
                        
                        print 'Calculating SNP covariance unscaled'
                        snp_cov_unscaled = sp.dot(norm_snps.T, norm_snps)
                        snp_cov_leave_one_out += snp_cov_unscaled
                  
                snp_cov_leave_one_out = snp_cov_leave_one_out / num_snps_used
                
                K_leave_one_out = K_leave_one_out / num_snps_used
                assert (K_leave_one_out - sp.diag(K_leave_one_out)).max() < 0.1, '..bug' 
                
                try:
                    cholesky_decomp_inv_snp_cov = linalg.cholesky(linalg.pinv(sp.array(snp_cov_leave_one_out, dtype='float64')))  
                    evals, evecs = linalg.eig(sp.array(K_leave_one_out, dtype='float64')) 
                except:
                    try: 
                        cholesky_decomp_inv_snp_cov = linalg.cholesky(linalg.pinv(sp.array(snp_cov_leave_one_out, dtype='float32')))
                        evals, evecs = linalg.eig(sp.array(K_leave_one_out, dtype='float32')) 
                    except:
                        print 'Failed when obtaining the Cholesky decomposotion or eigen decomposition'
                        print 'Moving on, trying again later.'
                        continue
                
                sort_indices = sp.argsort(evals,)
                ordered_evals = evals[sort_indices]
                print ordered_evals[-10:] / sp.sum(ordered_evals)
                ordered_evecs = evecs[:, sort_indices]
                d = {}
                d['evecs_leave_one_out'] = ordered_evecs
                d['evals_leave_one_out'] = ordered_evals
                d['cholesky_decomp_inv_snp_cov'] = cholesky_decomp_inv_snp_cov
                d['K_leave_one_out'] = K_leave_one_out
                d['K_unscaled'] = chromosome_dict[chrom_str]['K_unscaled']
                d['num_snps'] = chromosome_dict[chrom_str]['num_snps']
                d['snp_cov_leave_one_out'] = snp_cov_leave_one_out
                ok_chromosome_dict[chrom_str] = d
                not_done.remove(chrom)

    # While loop ends here.
    K_all_snps = K_all_snps / float(num_all_snps)
    in_h5f.close()
    ok_chromosome_dict['K_all_snps'] = K_all_snps
    ok_chromosome_dict['num_all_snps'] = num_all_snps

    assert sp.sum((ok_chromosome_dict['chr1']['K_leave_one_out'] - ok_chromosome_dict['chr2']['K_leave_one_out']) ** 2) != 0 , 'Kinships are probably too similar.'
        
    print 'Calculating PCAs'
    evals, evecs = linalg.eigh(sp.array(K_all_snps, dtype='float64'))  # PCA via eigen decomp
    evals[evals < 0] = 0
    sort_indices = sp.argsort(evals,)[::-1]
    ordered_evals = evals[sort_indices]
    print ordered_evals[:10] / sp.sum(ordered_evals)
    pcs = evecs[:, sort_indices]


    tot = sum(evals)
    var_exp = [(i / tot) * 100 for i in sorted(evals, reverse=True)]
    print 'Total variance explained:', sp.sum(var_exp)

    ok_chromosome_dict['pcs'] = pcs
    ok_chromosome_dict['pcs_var_exp'] = var_exp

    if figure_dir is not None:
        plt.clf()    
        plt.plot(pcs[:, 0], pcs[:, 1], 'k.')
        plt.title("Overall PCA")
        plt.xlabel('PC1')
        plt.xlabel('PC2')
        plt.tight_layout()
        plt.savefig(figure_dir + '/' + figure_fn, format='pdf')
        plt.clf()
    
    out_h5f = h5py.File(out_file)
    hu.dict_to_hdf5(ok_chromosome_dict, out_h5f)
    out_h5f.close()
    
    return ok_chromosome_dict
Пример #14
0
        A.add(n - 1, n - 1, kt / (dx / 2) * Ac)
        b[-1] += kt / (dx / 2) * Ac * T_B

        # Solution
        #        T = sp.linalg.solve(A,b)
        T = A.solve(b)

        return T

    print("START")
    # Number of control volumes
    n = 5
    # Thermal conductivity (W/mK)
    kt = 1000
    # Cross-sectional area (m2)
    Ac = 10e-3
    # Lenght (m)
    L = 0.5
    # Boundary temperatures (C)
    T_A = 100
    T_B = 500

    Tfull, dxFull = heatConduction1DConstantTemperatureBoundariesNoSources.main(
        n, kt, Ac, L, T_A, T_B)

    Tsparse = solver_1d(n, kt, Ac, L, T_A, T_B)

    assert all(sp.isclose(Tfull, Tsparse))
    print("OK")
    print("END")
Пример #15
0
def calc_kinship(input_file='Data/1Kgenomes/1K_genomes_v3.hdf5',
                 out_file='Data/1Kgenomes/kinship.hdf5',
                 maf_thres=0.01,
                 figure_dir='',
                 figure_fn='',
                 snp_filter_frac=1,
                 indiv_filter_frac=1,
                 chrom_ok_snp_dict=None):
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    print 'Loading Genotype from '
    in_h5f = h5py.File(input_file)
    #     eur_filter = in_h5f['indivs']['continent'][...] == 'EUR'
    #     num_indivs = sp.sum(eur_filter)
    indiv_ids = in_h5f['indiv_ids'][...]
    indiv_filter = None
    if indiv_filter_frac < 1:
        indiv_filter = sp.array(
            sp.random.random(len(indiv_ids)) < indiv_filter_frac,
            dtype='bool8')
        indiv_ids = indiv_ids[indiv_filter]
    assert len(sp.unique(indiv_ids)) == len(indiv_ids)
    num_indivs = len(indiv_ids)

    ok_chromosome_dict = {}

    not_done = set(range(1, 23))
    while len(not_done) > 0:
        chromosome_dict = {}

        K_all_snps = sp.zeros((num_indivs, num_indivs), dtype='float32')
        num_all_snps = 0

        sum_indiv_genotypes_all_chrom = sp.zeros(num_indivs, dtype='float32')
        #         snp_cov_all_snps = sp.zeros((num_indivs, num_indivs), dtype='float64')

        print 'Calculating kinship'

        for chrom in range(1, 23):
            print 'Working on Chromosome %d' % chrom
            chrom_str = 'chr%d' % chrom

            snp_filter = None
            if snp_filter_frac < 1:
                snp_filter = sp.random.random(len(
                    in_h5f[chrom_str]['snps'])) < snp_filter_frac

            g_dict = get_genotype_data(in_h5f,
                                       chrom,
                                       maf_thres,
                                       indiv_filter=indiv_filter,
                                       snp_filter=snp_filter,
                                       randomize_sign=True,
                                       snps_signs=None,
                                       chrom_ok_snp_dict=chrom_ok_snp_dict)

            norm_snps = g_dict['norm_snps']

            sum_indiv_genotypes = sp.sum(g_dict['norm_snps'], 0)
            sum_indiv_genotypes_all_chrom += sum_indiv_genotypes

            print 'Calculating chromosome kinship'
            K_unscaled = sp.array(sp.dot(norm_snps.T, norm_snps),
                                  dtype='float32')
            assert sp.isclose(
                sp.sum(sp.diag(K_unscaled)) / (len(norm_snps) * num_indivs),
                1.0), '..bug'
            K_all_snps += K_unscaled
            num_all_snps += len(norm_snps)

            print 'SNP-cov normalisation'
            sum_indiv_genotypes = sp.sum(norm_snps, 0)
            sum_indiv_genotypes_all_chrom += sum_indiv_genotypes
            mean_indiv_genotypes = sum_indiv_genotypes / len(norm_snps)
            norm_snps = norm_snps - mean_indiv_genotypes

            print 'Calculating SNP covariance unscaled'

            snp_cov_unscaled = sp.array(sp.dot(norm_snps.T, norm_snps),
                                        dtype='float32')
            #             snp_cov_all_snps += snp_cov_unscaled

            print 'Storing and updating things'
            chromosome_dict[chrom_str] = {
                'K_unscaled': K_unscaled,
                'num_snps': len(norm_snps),
                'sum_indiv_genotypes': sum_indiv_genotypes,
                'snp_cov_unscaled': snp_cov_unscaled,
                'snps_signs': g_dict['snps_signs']
            }

            if snp_filter_frac < 1:
                chromosome_dict[chrom_str]['snp_filter'] = snp_filter


#         snp_cov_all_snps = snp_cov_all_snps / float(num_all_snps)
#         K_all_snps = K_all_snps / float(num_all_snps)
#         print 'K_all_snps.shape: %s' % str(K_all_snps.shape)
#         print 'snp_cov_all_snps.shape: %s' % str(snp_cov_all_snps.shape)
#         print 'sp.diag(snp_cov_all_snps): %s' % str(sp.diag(snp_cov_all_snps))
#         print 'sp.mean(sp.diag(snp_cov_all_snps)_: %s' % str(sp.mean(sp.diag(snp_cov_all_snps)))

#         print 'Full kinship and snp-covariance calculation done using %d SNPs\n' % num_all_snps

        mean_indiv_genotypes_all_chrom = sum_indiv_genotypes_all_chrom / num_all_snps
        print 'Individual gentoype mean found:'
        print mean_indiv_genotypes_all_chrom

        print 'Calculating chromosome-wise SNP-covariance and kinship matrices'
        for chrom in range(1, 23):
            if chrom in not_done:
                print 'Working on Chromosome %d' % chrom
                chrom_str = 'chr%d' % chrom

                snp_cov_leave_one_out = sp.zeros((num_indivs, num_indivs),
                                                 dtype='float32')
                K_leave_one_out = sp.zeros((num_indivs, num_indivs),
                                           dtype='float32')
                num_snps_used = 0

                sum_indiv_genotypes = sp.zeros(num_indivs, dtype='float32')

                for chrom2 in range(1, 23):
                    chrom2_str = 'chr%d' % chrom2
                    if chrom2 != chrom:
                        sum_indiv_genotypes += chromosome_dict[chrom2_str][
                            'sum_indiv_genotypes']
                        K_leave_one_out += chromosome_dict[chrom2_str][
                            'K_unscaled']
                        num_snps_used += chromosome_dict[chrom2_str][
                            'num_snps']
                        assert sp.isclose(
                            sp.sum(sp.diag(K_leave_one_out)) /
                            (num_snps_used * num_indivs), 1.0), '..bug'

                mean_indiv_genotypes = sum_indiv_genotypes / num_snps_used

                for chrom2 in range(1, 23):
                    chrom2_str = 'chr%d' % chrom2
                    if chrom2 != chrom:
                        print 'Loading SNPs'
                        snps_signs = chromosome_dict[chrom2_str]['snps_signs']
                        snp_filter = chromosome_dict[chrom2_str]['snp_filter']
                        g_dict = get_genotype_data(
                            in_h5f,
                            chrom2,
                            maf_thres,
                            indiv_filter=indiv_filter,
                            snp_filter=snp_filter,
                            randomize_sign=True,
                            snps_signs=snps_signs,
                            chrom_ok_snp_dict=chrom_ok_snp_dict)
                        norm_snps = g_dict['norm_snps']
                        print 'SNP-cov normalisation'
                        norm_snps = norm_snps - mean_indiv_genotypes

                        print 'Calculating SNP covariance unscaled'
                        snp_cov_unscaled = sp.dot(norm_snps.T, norm_snps)
                        snp_cov_leave_one_out += snp_cov_unscaled

                snp_cov_leave_one_out = snp_cov_leave_one_out / num_snps_used

                K_leave_one_out = K_leave_one_out / num_snps_used
                assert (K_leave_one_out -
                        sp.diag(K_leave_one_out)).max() < 0.1, '..bug'

                try:
                    cholesky_decomp_inv_snp_cov = linalg.cholesky(
                        linalg.pinv(
                            sp.array(snp_cov_leave_one_out, dtype='float64')))
                    evals, evecs = linalg.eig(
                        sp.array(K_leave_one_out, dtype='float64'))
                except:
                    try:
                        cholesky_decomp_inv_snp_cov = linalg.cholesky(
                            linalg.pinv(
                                sp.array(snp_cov_leave_one_out,
                                         dtype='float32')))
                        evals, evecs = linalg.eig(
                            sp.array(K_leave_one_out, dtype='float32'))
                    except:
                        print 'Failed when obtaining the Cholesky decomposotion or eigen decomposition'
                        print 'Moving on, trying again later.'
                        continue

                sort_indices = sp.argsort(evals, )
                ordered_evals = evals[sort_indices]
                print ordered_evals[-10:] / sp.sum(ordered_evals)
                ordered_evecs = evecs[:, sort_indices]
                d = {}
                d['evecs_leave_one_out'] = ordered_evecs
                d['evals_leave_one_out'] = ordered_evals
                d['cholesky_decomp_inv_snp_cov'] = cholesky_decomp_inv_snp_cov
                d['K_leave_one_out'] = K_leave_one_out
                d['K_unscaled'] = chromosome_dict[chrom_str]['K_unscaled']
                d['num_snps'] = chromosome_dict[chrom_str]['num_snps']
                d['snp_cov_leave_one_out'] = snp_cov_leave_one_out
                ok_chromosome_dict[chrom_str] = d
                not_done.remove(chrom)

    # While loop ends here.
    K_all_snps = K_all_snps / float(num_all_snps)
    in_h5f.close()
    ok_chromosome_dict['K_all_snps'] = K_all_snps
    ok_chromosome_dict['num_all_snps'] = num_all_snps

    assert sp.sum((ok_chromosome_dict['chr1']['K_leave_one_out'] -
                   ok_chromosome_dict['chr2']['K_leave_one_out'])**
                  2) != 0, 'Kinships are probably too similar.'

    print 'Calculating PCAs'
    evals, evecs = linalg.eigh(sp.array(
        K_all_snps, dtype='float64'))  # PCA via eigen decomp
    evals[evals < 0] = 0
    sort_indices = sp.argsort(evals, )[::-1]
    ordered_evals = evals[sort_indices]
    print ordered_evals[:10] / sp.sum(ordered_evals)
    pcs = evecs[:, sort_indices]

    tot = sum(evals)
    var_exp = [(i / tot) * 100 for i in sorted(evals, reverse=True)]
    print 'Total variance explained:', sp.sum(var_exp)

    ok_chromosome_dict['pcs'] = pcs
    ok_chromosome_dict['pcs_var_exp'] = var_exp

    if figure_dir is not None:
        plt.clf()
        plt.plot(pcs[:, 0], pcs[:, 1], 'k.')
        plt.title("Overall PCA")
        plt.xlabel('PC1')
        plt.xlabel('PC2')
        plt.tight_layout()
        plt.savefig(figure_dir + '/' + figure_fn, format='pdf')
        plt.clf()

    out_h5f = h5py.File(out_file)
    hu.dict_to_hdf5(ok_chromosome_dict, out_h5f)
    out_h5f.close()

    return ok_chromosome_dict
Пример #16
0
    def solve_new(self):
        ###########################################################################
        # SOLVER LOOP
        ###########################################################################

        step = 0
        self.t = 0
        ts = [self.t]

        prev_time_for_prope_time = None

        while step < self.max_steps:

            # Check last step
            if self.t + float(self.dt_fe) > self.t_end:
                self.dt_fe = fe.Constant(self.t_end - self.t)
            # Check prope time
            elif self.t + float(
                    self.dt_fe) > self.prope_times[self.prope_times_k]:
                prev_time_for_prope_time = float(self.dt_fe)
                self.dt_fe = fe.Constant(self.prope_times[self.prope_times_k] -
                                         self.t)
                self.prope_times_k += 1

            # Progress time
            step += 1
            self.t += float(self.dt_fe)
            ts.append(self.t)

            ##############################
            # Solver
            ##############################

            # Solve heat and moisture
            fe.solve(self.a_T == self.L_T, self.T, self.bc_T)
            fe.solve(self.a_phi == self.L_phi, self.phi, self.bc_phi)

            # Assing
            if self.order_2nd:
                self.T_old2.assign(self.T_old)
                self.phi_old2.assign(self.phi_old)

            # Update solved fields
            self.T_old.assign(self.T)
            self.phi_old.assign(self.phi)

            # Update material properties
            phi_old_int = fe.interpolate(self.phi_old, self.v_materials)
            #            w_int       = fe.interpolate(self.w, self.v_materials)

            self.w.x_k = phi_old_int
            self.kT.x_k = phi_old_int
            self.delta_p.x_k = phi_old_int
            self.Dw.x_k = phi_old_int
            self.xi.x_k = phi_old_int

            ##############################
            # Post
            ##############################
            if sp.isclose(self.t, self.prope_times[self.prope_times_k]):
                self.prope()
                print("step=%i progress=%.2f t=%.2f (d) dt=%.2f (h)" %
                      (step, self.t / self.t_end, self.t / s2d,
                       float(self.dt_fe) / s2h))
                self.save_time()

            elif step % 100 == 0:
                print("step=%i progress=%.2f t=%.2f (d) dt=%.2f (h)" %
                      (step, self.t / self.t_end, self.t / s2d,
                       float(self.dt_fe) / s2h))
                self.save_time()

            ##############################
            # Next step
            ##############################

            # Check end
            if sp.isclose(self.t, self.t_end) or self.t > self.t_end:
                break

            # Increase timestep

            if prev_time_for_prope_time:
                self.dt_fe = fe.Constant(prev_time_for_prope_time)
                prev_time_for_prope_time = None

            self.dt_fe *= self.time_gamma
            if float(self.dt_fe) > self.max_dt:
                self.dt_fe = fe.Constant(self.max_dt)
Пример #17
0
def gen_unrelated_eur_1k_data(input_file='/home/bjarni/TheHonestGene/faststorage/1Kgenomes/phase3/1k_genomes_hg.hdf5' ,
                              out_file='/home/bjarni/PCMA/faststorage/1_DATA/1k_genomes/1K_genomes_phase3_EUR_unrelated.hdf5',
                              maf_thres=0.01, max_relatedness=0.05, K_thinning_frac=0.1, debug=False):
    h5f = h5py.File(input_file)
    num_indivs = len(h5f['indivs']['continent'])
    eur_filter = h5f['indivs']['continent'][...] == 'EUR'
    num_eur_indivs = sp.sum(eur_filter)
    print 'Number of European individuals: %d', num_eur_indivs
    K = sp.zeros((num_eur_indivs, num_eur_indivs), dtype='single')
    num_snps = 0
    std_thres = sp.sqrt(2.0 * (1 - maf_thres) * (maf_thres))

    print 'Calculating kinship'
    for chrom in range(1, 23):
        print 'Working on Chromosome %d' % chrom
        chrom_str = 'chr%d' % chrom
        
        print 'Loading SNPs and data'
        snps = sp.array(h5f[chrom_str]['calldata']['snps'][...], dtype='int8')

        print 'Loading NTs'
        ref_nts = h5f[chrom_str]['variants']['REF'][...]
        alt_nts = h5f[chrom_str]['variants']['ALT'][...]
        
        print 'Filtering multi-allelic SNPs'
        multi_allelic_filter = sp.negative(h5f[chrom_str]['variants']['MULTI_ALLELIC'][...])
        snps = snps[multi_allelic_filter]
        ref_nts = ref_nts[multi_allelic_filter]
        alt_nts = alt_nts[multi_allelic_filter]


        if K_thinning_frac < 1:
            print 'Thinning SNPs for kinship calculation'
            thinning_filter = sp.random.random(len(snps)) < K_thinning_frac
            snps = snps[thinning_filter]
            alt_nts = alt_nts[thinning_filter]
            ref_nts = ref_nts[thinning_filter]

        print 'Filter SNPs with missing NT information'
        nt_filter = sp.in1d(ref_nts, ok_nts)
        nt_filter = nt_filter * sp.in1d(alt_nts, ok_nts)
        if sp.sum(nt_filter) < len(nt_filter):
            snps = snps[nt_filter]

        print 'Filtering non-European individuals'
        snps = snps[:, eur_filter]

        print 'Filtering SNPs with MAF <', maf_thres
        snp_stds = sp.std(snps, 1)
        maf_filter = snp_stds.flatten() > std_thres
        snps = snps[maf_filter]
        snp_stds = snp_stds[maf_filter]
        
        print '%d SNPs remaining after all filtering steps.' % len(snps)

        print 'Normalizing SNPs'
        snp_means = sp.mean(snps, 1)
        norm_snps = (snps - snp_means[sp.newaxis].T) / snp_stds[sp.newaxis].T
        
        print 'Updating kinship'        
        K += sp.dot(norm_snps.T, norm_snps)
        num_snps += len(norm_snps)
        assert sp.isclose(sp.sum(sp.diag(K)) / (num_snps * num_eur_indivs), 1.0)

    K = K / float(num_snps)
    print 'Kinship calculation done using %d SNPs\n' % num_snps
    
    # Filter individuals
    print 'Filtering individuals'
    keep_indiv_set = set(range(num_eur_indivs))
    for i in range(num_eur_indivs):
        if i in keep_indiv_set:
            for j in range(i + 1, num_eur_indivs):
                if K[i, j] > max_relatedness:
                    if j in keep_indiv_set:
                        keep_indiv_set.remove(j)
    keep_indivs = list(keep_indiv_set)
    keep_indivs.sort()
    print 'Retained %d individuals\n' % len(keep_indivs)
    
    # Checking that everything is ok!
    K_ok = K[keep_indivs]
    K_ok = K_ok[:, keep_indivs]
    assert (K_ok - sp.tril(K_ok)).max() < max_relatedness

    indiv_filter = sp.zeros(num_indivs, dtype='bool8')
    indiv_filter[(sp.arange(num_indivs)[eur_filter])[keep_indivs]] = 1
    
    assert sp.sum(indiv_filter) == len(keep_indivs)
    
    # Store in new file
    print 'Now storing data.'
    oh5f = h5py.File(out_file, 'w')
    indiv_ids = h5f['indivs']['indiv_ids'][indiv_filter]
    oh5f.create_dataset('indiv_ids', data=indiv_ids)    
    for chrom in range(1, 23):
        print 'Working on Chromosome %d' % chrom
        chrom_str = 'chr%d' % chrom
        
        print 'Loading SNPs and data'
        snps = sp.array(h5f[chrom_str]['calldata']['snps'][...], dtype='int8')
        snp_ids = h5f[chrom_str]['variants']['ID'][...]
        positions = h5f[chrom_str]['variants']['POS'][...]

        print 'Loading NTs'
        ref_nts = h5f[chrom_str]['variants']['REF'][...]
        alt_nts = h5f[chrom_str]['variants']['ALT'][...]
        
        print 'Filtering multi-allelic SNPs'
        multi_allelic_filter = sp.negative(h5f[chrom_str]['variants']['MULTI_ALLELIC'][...])
        snps = snps[multi_allelic_filter]
        ref_nts = ref_nts[multi_allelic_filter]
        alt_nts = alt_nts[multi_allelic_filter]
        positions = positions[multi_allelic_filter]
        snp_ids = snp_ids[multi_allelic_filter]

        print 'Filter individuals'
        snps = snps[:, indiv_filter]
        
        print 'Filter SNPs with missing NT information'
        nt_filter = sp.in1d(ref_nts, ok_nts)
        nt_filter = nt_filter * sp.in1d(alt_nts, ok_nts)
        if sp.sum(nt_filter) < len(nt_filter):
            snps = snps[nt_filter]
            ref_nts = ref_nts[nt_filter]
            alt_nts = alt_nts[nt_filter]
            positions = positions[nt_filter]
            snp_ids = snp_ids[nt_filter]
        
        print 'filter monomorphic SNPs'
        snp_stds = sp.std(snps, 1)
        mono_morph_filter = snp_stds > 0
        snps = snps[mono_morph_filter]
        ref_nts = ref_nts[mono_morph_filter]
        alt_nts = alt_nts[mono_morph_filter]
        positions = positions[mono_morph_filter]
        snp_ids = snp_ids[mono_morph_filter]
        snp_stds = snp_stds[mono_morph_filter]

        snp_means = sp.mean(snps, 1)

        if debug:
            if K_thinning_frac < 1:
                print 'Thinning SNPs for kinship calculation'
                thinning_filter = sp.random.random(len(snps)) < K_thinning_frac
                k_snps = snps[thinning_filter]
                k_snp_stds = snp_stds[thinning_filter]

    
            print 'Filtering SNPs with MAF <', maf_thres
            maf_filter = k_snp_stds.flatten() > std_thres
            k_snps = k_snps[maf_filter]
            k_snp_stds = k_snp_stds[maf_filter]
            k_snp_means = sp.mean(k_snps)

            print 'Verifying that the Kinship makes sense'
            norm_snps = (k_snps - k_snp_means[sp.newaxis].T) / k_snp_stds[sp.newaxis].T
            K = sp.dot(norm_snps.T, norm_snps)
            num_snps += len(norm_snps)
            if sp.isclose(sp.sum(sp.diag(K)) / (num_snps * num_eur_indivs), 1.0) and (K - sp.tril(K)).max() < (max_relatedness * 1.5):
                print 'It looks OK!'
            else:
                raise Exception('Kinship looks wrong?')
        

        nts = sp.array([[nt1, nt2] for nt1, nt2 in izip(ref_nts, alt_nts)])

        print 'Writing to disk'
        cg = oh5f.create_group(chrom_str)
        cg.create_dataset('snps', data=snps)
        cg.create_dataset('snp_means', data=snp_means[sp.newaxis].T)
        cg.create_dataset('snp_stds', data=snp_stds[sp.newaxis].T)
        cg.create_dataset('snp_ids', data=snp_ids)
        cg.create_dataset('positions', data=positions)
        cg.create_dataset('nts', data=nts)
        oh5f.flush()
        print 'Done writing to disk'
        
#         centimorgans = h5f[chrom_str]['centimorgans'][...]
#         cg.create_dataset('centimorgans',data=centimorgans)
#         
#         centimorgan_rates = h5f[chrom_str]['centimorgan_rates'][...]
#         cg.create_dataset('centimorgan_rates',data=centimorgan_rates)
        
    oh5f.close()
    h5f.close()
    print 'Done'
                                                   step_estimate(state_seq, reward_seq, i),
                                                   step_weight(lambda_val, i))

        print '  weight sum:', sum([step_weight(lambda_val, j) for j in range(1, max_lookahead + 2)])
        print

    return sum([step_weight(lambda_val, k) * step_estimate(state_seq, reward_seq, k) for k in range(1, max_lookahead + 2)])


def fn(lambda_val, problem):
    return TD(lambda_val, problem) - TD(1, problem)


for idx, p in enumerate(problems):
    solved = False
    for guess in np.linspace(0.0, 0.9, 100):
        try:
            solution = optimize.newton(fn, guess, args=(p, ))
            if 0 <= solution < 0.99 and isclose(TD(1, p), TD(solution, p), tol):
                solved = True
                label = 'Unknown'
                if p.test:
                    label = 'Correct' if isclose(solution, p.solution, tol) else 'Failed'
                print 'Problem {}: {} ({})'.format(idx + 1, solution, label)
                break
        except RuntimeError:
            # Failed to converge after n iterations
            pass
    if not solved:
        print 'Problem {}: Failed'.format(idx + 1)