Exemplos de Bed.write em Python, exemplos de pysnptools.snpreader.Bed.write em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: estHerit.py Projeto: seanken/PrivSTRAT

def divideData(filename,direct,num=5,mph=3,delet=True):
	print "Estimating heritability using "+str(num)+" components"
	[yFil,sFil]=getData(filename,mph=mph);
	n=sFil.iid_count	
	reOrd=perm(n);
	yFil=yFil[reOrd,:];
	sFil=sFil[reOrd,:];

	div=[int(math.ceil( i*n/float(num) )) for i in range(0,num+1)];
		
	varEsts=[];

	for i in range(0,num):
		print "For component "+str(i);
		sFilTemp=sFil[div[i]:div[i+1],:];

		yFilTemp=yFil[div[i]:div[i+1],:];

		fileTemp=direct+"/tempFile_"+str(i);
		Bed.write(fileTemp,sFilTemp.read());
		Pheno.write(fileTemp+".phen",yFilTemp.read())
		
		varEsts.append(varRes(fileTemp,direct));
		
		

		if delet:
			os.system("rm "+direct+"/tempFile_"+str(i)+"*");
	
	return varEsts;

Exemplo n.º 2

0

Exibir arquivo

Arquivo: estHerit.py Projeto: wenwenyu/PrivSTRAT

def divideData(filename, direct, num=5, mph=3, delet=True):
    print "Estimating heritability using " + str(num) + " components"
    [yFil, sFil] = getData(filename, mph=mph)
    n = sFil.iid_count
    reOrd = perm(n)
    yFil = yFil[reOrd, :]
    sFil = sFil[reOrd, :]

    div = [int(math.ceil(i * n / float(num))) for i in range(0, num + 1)]

    varEsts = []

    for i in range(0, num):
        print "For component " + str(i)
        sFilTemp = sFil[div[i]:div[i + 1], :]

        yFilTemp = yFil[div[i]:div[i + 1], :]

        fileTemp = direct + "/tempFile_" + str(i)
        Bed.write(fileTemp, sFilTemp.read())
        Pheno.write(fileTemp + ".phen", yFilTemp.read())

        varEsts.append(varRes(fileTemp, direct))

        if delet:
            os.system("rm " + direct + "/tempFile_" + str(i) + "*")

    return varEsts

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test.py Projeto: MMesbahU/PySnpTools

 def test_write_x_x_cpp(self):
     snpreader = Bed(self.currentFolder + "/examples/toydata")
     for order in ['C','F']:
         for dtype in [np.float32,np.float64]:
             snpdata = snpreader.read(order=order,dtype=dtype)
             snpdata.val[-1,0] = float("NAN")
             output = "tempdir/toydata.{0}{1}.cpp".format(order,"32" if dtype==np.float32 else "64")
             create_directory_if_necessary(output)
             Bed.write(output, snpdata)
             snpdata2 = Bed(output).read()
             np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test.py Projeto: amcdavid/PySnpTools

 def test_write_x_x_cpp(self):
     snpreader = Bed(self.currentFolder + "/examples/toydata")
     for order in ['C','F']:
         for dtype in [np.float32,np.float64]:
             snpdata = snpreader.read(order=order,dtype=dtype)
             snpdata.val[-1,0] = float("NAN")
             output = "tempdir/toydata.{0}{1}.cpp".format(order,"32" if dtype==np.float32 else "64")
             create_directory_if_necessary(output)
             Bed.write(snpdata, output)
             snpdata2 = Bed(output).read()
             assert TestLoader.is_same(snpdata, snpdata2) #!!!define an equality method on snpdata?

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test.py Projeto: amcdavid/PySnpTools

 def too_slow_test_write_bedbig(self):
     iid_count = 100000
     sid_count = 50000
     from pysnptools.snpreader.snpdata import SnpData #!!! promote on level up innamespace
     iid = np.array([[str(i),str(i)] for i in xrange(iid_count)])
     sid = np.array(["sid_{0}".format(i) for i in xrange(sid_count)])
     pos = np.array([[i,i,i] for i in xrange(sid_count)])
     np.random.seed = 0
     snpdata = SnpData(iid,sid,pos,np.zeros((iid_count,sid_count))) #random.choice((0.0,1.0,2.0,float("nan")),size=(iid_count,sid_count)))
     output = "tempdir/bedbig.{0}.{1}".format(iid_count,sid_count)
     create_directory_if_necessary(output)
     Bed.write(snpdata, output)
     snpdata2 = Bed(output).read()
     assert TestLoader.is_same(snpdata, snpdata2) #!!!define an equality method on snpdata?

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test.py Projeto: MicrosoftGenomics/PySnpTools

 def too_slow_test_write_bedbig(self):
     iid_count = 100000
     sid_count = 50000
     from pysnptools.snpreader import SnpData
     iid = np.array([[str(i),str(i)] for i in range(iid_count)])
     sid = np.array(["sid_{0}".format(i) for i in range(sid_count)])
     pos = np.array([[i,i,i] for i in range(sid_count)])
     np.random.seed(0)
     snpdata = SnpData(iid,sid,np.zeros((iid_count,sid_count)),pos=pos) #random.choice((0.0,1.0,2.0,float("nan")),size=(iid_count,sid_count)))
     output = "tempdir/bedbig.{0}.{1}".format(iid_count,sid_count)
     create_directory_if_necessary(output)
     Bed.write(output, snpdata, count_A1=False)
     snpdata2 = Bed(output,count_A1=False).read()
     np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)

Exemplo n.º 7

0

Exibir arquivo

    def write(path, storage, snpdata, count_A1=True, updater=None):
        file_list = [
            SnpReader._name_of_other_file(path,
                                          remove_suffix="bed",
                                          add_suffix=new_suffix)
            for new_suffix in ["bim", "fam", "bed"]
        ]  #'bed' should be last
        with _multiopen(
                lambda file_name: storage.open_write(file_name,
                                                     updater=updater),
                file_list) as local_file_name_list:
            Bed.write(local_file_name_list[-1], snpdata, count_A1=count_A1)

        return _Distributed1Bed(path, storage)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test.py Projeto: amcdavid/PySnpTools

 def test_write_bed_f64cpp_5_python(self):
     snpreader = Bed(self.currentFolder + "/examples/toydata")
     iid_index = 5
     logging.info("iid={0}".format(iid_index))
     #if snpreader.iid_count % 4 == 0: # divisible by 4 isn't a good test
     #    snpreader = snpreader[0:-1,:]
     #assert snpreader.iid_count % 4 != 0
     snpdata = snpreader[0:iid_index,:].read(order='F',dtype=np.float64)
     if snpdata.iid_count > 0:
         snpdata.val[-1,0] = float("NAN")
     output = "tempdir/toydata.F64python.{0}".format(iid_index)
     create_directory_if_necessary(output)
     Bed.write(snpdata, output,force_python_only=True)
     snpdata2 = Bed(output).read()
     assert TestLoader.is_same(snpdata, snpdata2) #!!!define an equality method on snpdata?

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test.py Projeto: MicrosoftGenomics/PySnpTools

 def test_write_bed_f64cpp_5_python(self):
     snpreader = Bed(self.currentFolder + "/examples/toydata",count_A1=False)
     iid_index = 5
     logging.info("iid={0}".format(iid_index))
     #if snpreader.iid_count % 4 == 0: # divisible by 4 isn't a good test
     #    snpreader = snpreader[0:-1,:]
     #assert snpreader.iid_count % 4 != 0
     snpdata = snpreader[0:iid_index,:].read(order='F',dtype=np.float64)
     if snpdata.iid_count > 0:
         snpdata.val[-1,0] = float("NAN")
     output = "tempdir/toydata.F64python.{0}".format(iid_index)
     create_directory_if_necessary(output)
     Bed.write(output,snpdata, force_python_only=True)
     snpdata2 = Bed(output,count_A1=False).read()
     np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test.py Projeto: hyacz/PySnpTools

 def test_write_x_x_cpp(self):
     for count_A1 in [False, True]:
         snpreader = Bed(self.currentFolder + "/examples/toydata",
                         count_A1=count_A1)
         for order in ['C', 'F']:
             for dtype in [np.float32, np.float64]:
                 snpdata = snpreader.read(order=order, dtype=dtype)
                 snpdata.val[-1, 0] = float("NAN")
                 output = "tempdir/toydata.{0}{1}.cpp".format(
                     order, "32" if dtype == np.float32 else "64")
                 create_directory_if_necessary(output)
                 Bed.write(output, snpdata, count_A1=count_A1)
                 snpdata2 = Bed(output, count_A1=count_A1).read()
                 np.testing.assert_array_almost_equal(snpdata.val,
                                                      snpdata2.val,
                                                      decimal=10)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test.py Projeto: hyacz/PySnpTools

 def test_write_bed_f64cpp_5_python(self):
     snpreader = Bed(self.currentFolder + "/examples/toydata",
                     count_A1=False)
     iid_index = 5
     logging.info("iid={0}".format(iid_index))
     #if snpreader.iid_count % 4 == 0: # divisible by 4 isn't a good test
     #    snpreader = snpreader[0:-1,:]
     #assert snpreader.iid_count % 4 != 0
     snpdata = snpreader[0:iid_index, :].read(order='F', dtype=np.float64)
     if snpdata.iid_count > 0:
         snpdata.val[-1, 0] = float("NAN")
     output = "tempdir/toydata.F64python.{0}".format(iid_index)
     create_directory_if_necessary(output)
     Bed.write(output, snpdata, force_python_only=True)
     snpdata2 = Bed(output, count_A1=False).read()
     np.testing.assert_array_almost_equal(snpdata.val,
                                          snpdata2.val,
                                          decimal=10)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test.py Projeto: hyacz/PySnpTools

 def too_slow_test_write_bedbig(self):
     iid_count = 100000
     sid_count = 50000
     from pysnptools.snpreader import SnpData
     iid = np.array([[str(i), str(i)] for i in range(iid_count)])
     sid = np.array(["sid_{0}".format(i) for i in range(sid_count)])
     pos = np.array([[i, i, i] for i in range(sid_count)])
     np.random.seed(0)
     snpdata = SnpData(
         iid, sid, np.zeros((iid_count, sid_count)), pos=pos
     )  #random.choice((0.0,1.0,2.0,float("nan")),size=(iid_count,sid_count)))
     output = "tempdir/bedbig.{0}.{1}".format(iid_count, sid_count)
     create_directory_if_necessary(output)
     Bed.write(output, snpdata, count_A1=False)
     snpdata2 = Bed(output, count_A1=False).read()
     np.testing.assert_array_almost_equal(snpdata.val,
                                          snpdata2.val,
                                          decimal=10)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: process_plink.py Projeto: yanjunzan/GMAT

def shuffle_bed(bed_file):
    """
    shuffle the genotypes of individuals snp-by-snp
    :param bed_file: the prefix for plink binary file
    :return: the shuffled plink binary file
    """
    try:
        from pysnptools.snpreader import Bed
    except Exception as e:
        print(e)
        return 0
    logging.INFO('Read the plink file')
    data = Bed(bed_file, count_A1=False).read()
    num_snp = data.val.shape[1]
    logging.INFO("Start shuffle the genotypes snp-by-snp")
    for i in tqdm(range(num_snp)):
        np.random.shuffle(data.val[:, i])
    logging.INFO('Write the shuffled plink file')
    Bed.write(bed_file + '_shuffle', data, count_A1=False)
    return 1

Exemplo n.º 14

0

Exibir arquivo

def gen_Test_Bed(filename, n0, n1, m):
    n = n0 + n1
    iid = [["fam_" + str(i), "iid_" + str(i)] for i in range(0, n)]
    sid = ["snp_" + str(i) for i in range(0, m)]
    X = [[2.0 for i in range(0, m)] for i in range(0, n1)]
    X.extend([[0.0 for i in range(0, m)] for i in range(0, n0)])
    dat = SnpData(iid=iid, sid=sid, val=X)
    Bed.write(filename, dat)
    fil = open(filename + ".fam")
    lines = fil.readlines()
    fil.close()
    fil = open(filename + ".fam", "w")
    for i in range(0, len(lines)):
        l = lines[i]
        s = l.strip().split()
        if i < n1:
            s[5] = "2"
        else:
            s[5] = "1"
        l = " ".join(s) + "\n"
        fil.write(l)
    fil.close()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_gpu_perf.py Projeto: fastlmm/FaST-LMM

def snpsA(seed, iid_count, sid_count, use_distributed):
    import numpy as np
    from pysnptools.snpreader import Bed
    from pysnptools.snpreader import DistributedBed
    from pysnptools.snpreader import SnpGen

    chrom_count = 10
    global top_cache
    if use_distributed:
        test_snp_path = (
            cache_top /
            f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}_db")
    else:
        test_snp_path = (
            cache_top /
            f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}.bed")
    count_A1 = False
    if not test_snp_path.exists():
        snpgen = SnpGen(
            seed=seed,
            iid_count=iid_count,
            sid_count=sid_count,
            chrom_count=chrom_count,
            block_size=1000,
        )
        if use_distributed:
            test_snps = DistributedBed.write(str(test_snp_path), snpgen)
        else:
            test_snps = Bed.write(str(test_snp_path),
                                  snpgen.read(dtype="float32"),
                                  count_A1=count_A1)
    else:
        if use_distributed:
            test_snps = DistributedBed(str(test_snp_path))
        else:
            test_snps = Bed(str(test_snp_path), count_A1=count_A1)
    from pysnptools.snpreader import SnpData

    np.random.seed(seed)
    pheno = SnpData(
        iid=test_snps.iid,
        sid=["pheno"],
        val=np.random.randn(test_snps.iid_count, 1) * 3 + 2,
    )
    covar = SnpData(
        iid=test_snps.iid,
        sid=["covar1", "covar2"],
        val=np.random.randn(test_snps.iid_count, 2) * 2 - 3,
    )

    return test_snps, pheno, covar

Exemplo n.º 16

0

Exibir arquivo

        sid_count_max = 5765294
        sid_batch_size = 50

        sid_batch_count = -(sid_count // -sid_batch_size)
        sid_batch_count_max = -(sid_count_max // -sid_batch_size)
        snpgen = SnpGen(seed=234, iid_count=iid_count, sid_count=sid_count_max)

        for batch_index in range(sid_batch_count):
            sid_index_start = batch_index * sid_batch_size
            sid_index_end = (batch_index + 1) * sid_batch_size  # what about rounding
            filename = r"d:\deldir\rand\fakeukC{0}x{1}-{2}.bed".format(
                iid_count, sid_index_start, sid_index_end
            )
            if not os.path.exists(filename):
                Bed.write(
                    filename + ".temp", snpgen[:, sid_index_start:sid_index_end].read()
                )
                os.rename(filename + ".temp", filename)

    if False:
        from pysnptools.snpreader import Pheno, Bed

        filename = r"m:\deldir\New folder (4)\all_chr.maf0.001.N300.bed"
        iid_count = 300
        iid = [["0", "iid_{0}".format(iid_index)] for iid_index in range(iid_count)]
        bed = Bed(filename, iid=iid, count_A1=False)
        print(bed.iid_count)

    if False:
        from pysnptools.util import example_file

Exemplo n.º 17

0

Exibir arquivo

Arquivo: bed.py Projeto: eric-czech/PySnpTools

                val[0::4, SNPsIndex:SNPsIndex + 1][bytes >= 3] = byteThree
            val = val[iid_index, :]  #reorder or trim any extra allocation
            if not SnpReader._array_properties_are_ok(val, order, dtype):
                val = val.copy(order=order)
            self._close_bed()

        return val


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    if True:
        from pysnptools.util import example_file
        pheno_fn = example_file("pysnptools/examples/toydata.phe")

    if False:
        from pysnptools.snpreader import Pheno, Bed
        import pysnptools.util as pstutil
        import os
        print(os.getcwd())
        snpdata = Pheno(
            '../examples/toydata.phe').read()  # Read data from Pheno format
        pstutil.create_directory_if_necessary("tempdir/toydata.5chrom.bed")
        Bed.write("tempdir/toydata.5chrom.bed", snpdata,
                  count_A1=False)  # Write data in Bed format

    import doctest
    doctest.testmod(optionflags=doctest.ELLIPSIS)
    # There is also a unit test case in 'pysnptools\test.py' that calls this doc test

Exemplo n.º 18

0

Exibir arquivo

#    print('Time for permutation GWAS:' + str(time.time() - time_permut_0) + 's')


# # Permutations

# In[8]:


# Shuffling ALLELES by VARIANT

for i in range(NUMBER_OF_PERMUTATIONS):
    time_permut_0 = time.time()
    
    # Python works a little different than R: Shuffle directly modifies the input data frame!
    np.random.shuffle(mysnpdata.val)
    Bed.write('VariantsPermuted', mysnpdata)
    copyfile(VARIANTS_TO_TEST + '.bim', 'VariantsPermuted.bim')

    tmp_shuffled_df = single_snp('VariantsPermuted',  PHENOTYPE_DATA,
#                                cache_file='Outputs/Fast-Lmm-Cache/Gwas-Permutations-Cache'+str(i)
                                 cache_file='Outputs/Fast-Lmm-Cache/Gwas-Permutations-Cache.npz',
                                 leave_out_one_chrom=False,
                                 )
    tmp_shuffled_df['Full ID'] = tmp_shuffled_df['Chr'].astype('str') + '_' + tmp_shuffled_df['ChrPos'].astype('str')
    
    # sorting the new df to match the original
    tmp_shuffled_df = tmp_shuffled_df[['Full ID', 'SNP', 'PValue']]
    tmp_shuffled_df = tmp_shuffled_df.rename(columns={'Full ID':'Full IDShuffled'+str(i+1),
                                                      'PValue':'PValueShuffled'+str(i+1)})
    
    snpdata = mysnpdata.val

Exemplo n.º 19

0

Exibir arquivo

Arquivo: project_out_pcs.py Projeto: yakirr/statgen_y1

    header=0,
)

fam_df = pd.read_csv(args.bfile + ".fam", delim_whitespace=True, usecols=[0, 1], names=["FID", "IID"])

merged_df = pd.merge(fam_df, pcs_df, on="IID").set_index("IID")
pcs = merged_df.ix[fam_df.ix[:, 1], 2:]
Q, R = np.linalg.qr(pcs)

print("reading dataset")
dataset = Bed(args.bfile).read().standardize()
dataset.standardize()

import pdb

pdb.set_trace()
Bed.write("temp", dataset)

print("projecting data")
X_Q = Q.T.dot(dataset.val)

print("unprojecting")
X_rr = Q.dot(X_Q)

print("subtracting out population structure")
X = dataset.val - X_rr

print("writing")
newbed = SnpData(dataset.iid, dataset.sid, X, pos=dataset.pos)
Bed.write("temp.bed", newbed)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: pairs.py Projeto: eric-czech/PySnpTools

    data_file = 'd:\OneDrive\programs\epiCornell\syndata.bed'
    if False:
        from pysnptools.snpreader import SnpData
        import numpy as np
        bed1 = Bed("../../tests/datasets/synth/all")
        print(bed1.iid_count, bed1.sid_count, bed1.iid_count * bed1.sid_count)
        #goal 1500 individuals x 27000 SNP
        snpdata1 = bed1.read()
        iid = bed1.iid
        sid = ['sid{0}'.format(i) for i in xrange(27000)]
        val = np.tile(snpdata1.val,(3,6))[:,:27000].copy()
        #snpdata = Pheno('pysnptools/examples/toydata.phe').read()         # Read data from Pheno format
        snpdata2 = SnpData(iid, sid, val)
        print(snpdata2.iid_count, snpdata2.sid_count, snpdata2.iid_count * snpdata2.sid_count)
        Bed.write(snpdata2,data_file,count_A1=False)

    synbed = Bed(data_file)
    print(synbed.iid_count, synbed.sid_count, synbed.iid_count * synbed.sid_count)

    part_count = 1000
    part_list = list(split_on_sids(synbed,part_count))

    pairs00 = _Pairs(part_list[0])
    from fastlmm.association import single_snp
    pheno_fn = r"d:\OneDrive\programs\epiCornell\pheno.txt"
    cov_fn = r"d:\OneDrive\programs\epiCornell\cov.txt"
    results_df = single_snp(pairs00, K0=synbed, pheno=pheno_fn, covar=cov_fn, leave_out_one_chrom=False, count_A1=True)

    if False:
        for i,synbed_part_i in enumerate(synbed_part_list):

Exemplo n.º 21

0

Exibir arquivo

Arquivo: bed.py Projeto: MicrosoftGenomics/PySnpTools

                val[1::4,SNPsIndex:SNPsIndex+1][bytes>=4]=np.nan
                val[1::4,SNPsIndex:SNPsIndex+1][bytes>=8]=1
                val[1::4,SNPsIndex:SNPsIndex+1][bytes>=12]=byteThree
                bytes=np.mod(bytes,4)
                val[0::4,SNPsIndex:SNPsIndex+1]=byteZero
                val[0::4,SNPsIndex:SNPsIndex+1][bytes>=1]=np.nan
                val[0::4,SNPsIndex:SNPsIndex+1][bytes>=2]=1
                val[0::4,SNPsIndex:SNPsIndex+1][bytes>=3]=byteThree
            val = val[iid_index_out,:] #reorder or trim any extra allocation


            #!!LATER this can fail because the trim statement above messes up the order
            #assert(SnpReader._array_properties_are_ok(val, order, dtype)) #!!
            self._close_bed()

        return val


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    from pysnptools.snpreader import Pheno, Bed
    import pysnptools.util as pstutil
    snpdata = Pheno('../examples/toydata.phe').read()         # Read data from Pheno format
    pstutil.create_directory_if_necessary("tempdir/toydata.bed")
    Bed.write("tempdir/toydata.bed",snpdata,count_A1=False)   # Write data in Bed format

    import doctest
    doctest.testmod()
    # There is also a unit test case in 'pysnptools\test.py' that calls this doc test

Exemplo n.º 22

0

Exibir arquivo

Arquivo: project_out_pcs.py Projeto: yakirr/statgen_y1

fam_df = pd.read_csv(args.bfile + '.fam',
                     delim_whitespace=True,
                     usecols=[0, 1],
                     names=['FID', 'IID'])

merged_df = pd.merge(fam_df, pcs_df, on='IID').set_index('IID')
pcs = merged_df.ix[fam_df.ix[:, 1], 2:]
Q, R = np.linalg.qr(pcs)

print('reading dataset')
dataset = Bed(args.bfile).read().standardize()
dataset.standardize()

import pdb
pdb.set_trace()
Bed.write('temp', dataset)

print('projecting data')
X_Q = Q.T.dot(dataset.val)

print('unprojecting')
X_rr = Q.dot(X_Q)

print('subtracting out population structure')
X = dataset.val - X_rr

print('writing')
newbed = SnpData(dataset.iid, dataset.sid, X, pos=dataset.pos)
Bed.write('temp.bed', newbed)

Exemplo n.º 23

0

Exibir arquivo

print phenoreader, phenoreader.iid_count, phenoreader.sid_count, phenoreader.sid, phenoreader.pos
#Pheno('pheno_10_causals.txt') 500 1 ['pheno0'] [[ nan  nan  nan]]
phenodata = phenoreader.read()
print phenodata.val
#[[  4.85339514e-01]
# [ -2.07698457e-01]
# [  1.49090841e+00]
# [ -1.21289967e+00]
# ...

# Write 1st 10 iids and sids of Bed data into Pheno format
snpdata1010 = Bed("all.bed")[:10, :10].read()
Pheno.write("deleteme1010.txt", snpdata1010)

#Write it to Bed format
Bed.write("deleteme1010.bed", snpdata1010)

# Create a snpdata on the fly and write to Bed
snpdata1 = SnpData(iid=[['f1', 'c1'], ['f1', 'c2'], ['f2', 'c1']],
                   sid=['snp1', 'snp2'],
                   val=[[0, 1], [2, 1], [1, np.nan]])
Bed.write("deleteme1.bed", snpdata1)

#Pheno is slow because its txt. Bed format can only hold 0,1,2,missing.
# Use SnpNpz for fastest read/write times, smallest file size
from pysnptools.snpreader import SnpNpz

SnpNpz.write("deleteme1010.snp.npz", snpdata1010)

# Use SnpHdf5 for random-access reads, good speed and size, and compatiblity outside Python
from pysnptools.snpreader import SnpHdf5

Exemplo n.º 24

0

Exibir arquivo

Arquivo: tutorial.py Projeto: MicrosoftGenomics/PySnpTools

print phenoreader, phenoreader.iid_count, phenoreader.sid_count, phenoreader.sid, phenoreader.pos
#Pheno('pheno_10_causals.txt') 500 1 ['pheno0'] [[ nan  nan  nan]]
phenodata = phenoreader.read()
print phenodata.val
#[[  4.85339514e-01]
# [ -2.07698457e-01]
# [  1.49090841e+00]
# [ -1.21289967e+00]
# ...

# Write 1st 10 iids and sids of Bed data into Pheno format
snpdata1010 = Bed("all.bed")[:10,:10].read()
Pheno.write("deleteme1010.txt",snpdata1010)

#Write it to Bed format
Bed.write("deleteme1010.bed",snpdata1010)

# Create a snpdata on the fly and write to Bed
snpdata1 = SnpData(iid=[['f1','c1'],['f1','c2'],['f2','c1']],sid=['snp1','snp2'],val=[[0,1],[2,1],[1,np.nan]])
Bed.write("deleteme1.bed",snpdata1)


#Pheno is slow because its txt. Bed format can only hold 0,1,2,missing.
# Use SnpNpz for fastest read/write times, smallest file size
from pysnptools.snpreader import SnpNpz
SnpNpz.write("deleteme1010.snp.npz", snpdata1010)

# Use SnpHdf5 for random-access reads, good speed and size, and compatiblity outside Python
from pysnptools.snpreader import SnpHdf5
SnpHdf5.write("deleteme1010.snp.hdf5", snpdata1010)