示例#1
0
    def _read_pstdata(self):
        #LATER switch it, so the main code is here rather than in loadPhen
        if isinstance(self.filename,str):
            pheno_input = pstpheno.loadPhen(self.filename,missing=self.missing)
        elif self.filename is None:
            assert self._iid_if_none is not None, "If input is None then iid_if_none be given"
            pheno_input = {
            'header':np.empty((0),dtype='str'),
            'vals': np.empty((len(self._iid_if_none), 0)),
            'iid': self._iid_if_none
            }
        else:
            pheno_input = self.filename


        if len(pheno_input['vals'].shape) == 1:
            pheno_input = {
            'header' : pheno_input['header'],
            'vals' : np.reshape(pheno_input['vals'],(-1,1)),
            'iid' : pheno_input['iid']
            }

        if len(pheno_input['header']) > 0 and pheno_input['header'][0] is None:
            pheno_input['header'] = ["pheno{0}".format(i) for i in range(len(pheno_input['header']))] #LATER move to reader?
        elif len(pheno_input['header']) == 0:
            pheno_input['header'] = ["pheno{0}".format(i) for i in range(pheno_input['vals'].shape[1])]

        row = pheno_input['iid']
        col = np.array(pheno_input['header'],dtype='str')
        col_property = np.empty((len(col),3))
        col_property.fill(np.nan)
        val = pheno_input['vals']

        snpdata = SnpData(iid=row,sid=col,pos=col_property,val=val)
        return snpdata
示例#2
0
    def _read_pstdata(self):
        #LATER switch it, so the main code is here rather than in loadPhen
        if isinstance(self.filename,str):
            pheno_input = pstpheno.loadPhen(self.filename,missing=self.missing)
        elif self.filename is None:
            assert self._iid_if_none is not None, "If input is None then iid_if_none be given"
            pheno_input = {
            'header':np.empty((0),dtype='str'),
            'vals': np.empty((len(self._iid_if_none), 0)),
            'iid': self._iid_if_none
            }
        else:
            pheno_input = self.filename


        if len(pheno_input['vals'].shape) == 1:
            pheno_input = {
            'header' : pheno_input['header'],
            'vals' : np.reshape(pheno_input['vals'],(-1,1)),
            'iid' : pheno_input['iid']
            }

        if len(pheno_input['header']) > 0 and pheno_input['header'][0] is None:
            pheno_input['header'] = ["pheno{0}".format(i) for i in xrange(len(pheno_input['header']))] #LATER move to reader?
        elif len(pheno_input['header']) == 0:
            pheno_input['header'] = ["pheno{0}".format(i) for i in xrange(pheno_input['vals'].shape[1])]

        row = pheno_input['iid']
        col = np.array(pheno_input['header'],dtype='str')
        col_property = np.empty((len(col),3))
        col_property.fill(np.nan)
        val = pheno_input['vals']

        snpdata = SnpData(iid=row,sid=col,pos=col_property,val=val)
        return snpdata
示例#3
0
def load_snp_data(snpreader,
                  pheno_fn,
                  cov_fn=None,
                  offset=True,
                  mpheno=0,
                  standardizer=Unit()):
    """Load plink files
    ----------

    snpreader : snpreader object
        object to read in binary SNP file

    pheno_fn : str
        File name of phenotype file

    cov_fn : str
        File name of covariates file

    offset : bool, default=True
        Adds offset to the covariates specified in cov_fn, if neccesssary


    Returns
    -------
    G : array, shape = [n_samples, n_features]
        SNP matrix

    X : array, shape = [n_samples, n_covariates]
        Matrix of covariates (e.g. age, gender)

    y : array, shape = [n_samples]
        Phenotype (target) vector

    """

    #TODO: completely remove this
    pheno = pstpheno.loadOnePhen(pheno_fn, mpheno, vectorize=True)
    geno = snpreader.read(order='C').standardize(standardizer)

    # sanity check
    #assert np.testing.assert_array_equal(ind_iid, pheno['iid'][indarr[:,0]])

    # load covariates or generate vector of ones (for bias)
    if cov_fn == None:
        cov = {'vals': np.ones((len(pheno['iid']), 1)), 'iid': pheno['iid']}
    else:
        cov = pstpheno.loadPhen(cov_fn)

    (y, yiid), G, (X, xiid) = pstutil.intersect_apply(
        [(pheno['vals'], pheno['iid']), geno, (cov['vals'], cov['iid'])],
        sort_by_dataset=False)
    G = G.read(order='C', view_ok=True)

    # add bias column if not present
    if offset and sp.all(X.std(0) != 0):
        offset = sp.ones((len(indarr), 1))
        X = sp.hstack((X, offset))

    return G, X, y
def load_snp_data(snpreader, pheno_fn, cov_fn=None, offset=True, mpheno=0, standardizer=Unit()):
    """Load plink files
    ----------

    snpreader : snpreader object
        object to read in binary SNP file

    pheno_fn : str
        File name of phenotype file

    cov_fn : str
        File name of covariates file

    offset : bool, default=True
        Adds offset to the covariates specified in cov_fn, if neccesssary


    Returns
    -------
    G : array, shape = [n_samples, n_features]
        SNP matrix

    X : array, shape = [n_samples, n_covariates]
        Matrix of covariates (e.g. age, gender)

    y : array, shape = [n_samples]
        Phenotype (target) vector

    """
    
    #TODO: completely remove this
    pheno = pstpheno.loadOnePhen(pheno_fn,mpheno, vectorize=True)
    geno = snpreader.read(order='C').standardize(standardizer)

    # sanity check
    #assert np.testing.assert_array_equal(ind_iid, pheno['iid'][indarr[:,0]])

    # load covariates or generate vector of ones (for bias)
    if cov_fn == None:
        cov = {'vals': np.ones((len(pheno['iid']), 1)), 'iid':pheno['iid']}
    else:
        cov = pstpheno.loadPhen(cov_fn)

    (y, yiid), G, (X, xiid) = pstutil.intersect_apply([(pheno['vals'],pheno['iid']), geno, (cov['vals'],cov['iid'])], sort_by_dataset=False)
    G = G.read(order='C', view_ok=True)

    # add bias column if not present
    if offset and sp.all(X.std(0)!=0):
        offset = sp.ones((len(indarr),1))
        X = sp.hstack((X,offset))  
        
    return G, X, y
    def test_preload_files(self):
        logging.info("TestSingleSnp test_preload_files")
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps, count_A1=False)

        output_file_name = self.file_name("preload_files")

        frame = single_snp(test_snps=bed[:,:10], pheno=pheno, G0=test_snps, mixing=0,leave_out_one_chrom=False,
                                  covar=covar, output_file_name=output_file_name,count_A1=False
                                  )
        self.compare_files(frame,"one")
示例#6
0
 def load_covariates(self, pheno):
     if self.cov_fn == None:
         cov_iid = pheno['iid']
         X = np.ones((len(cov_iid), 1))
     else:
         cov = pstpheno.loadPhen(self.cov_fn)
         X = cov['vals']
         cov_iid = cov['iid']
         # add bias column if not present - #!! LATER -- Bug? should this test be done after intersection in case removing an iid makes it constant?
         if self.offset and sp.all(X.std(0) != 0):
             offset = sp.ones((len(X), 1))
             self.X = sp.hstack((X, offset))
     return X, cov_iid
 def load_covariates(self, pheno):
     if self.cov_fn == None:
         cov_iid = pheno['iid']
         X = np.ones((len(cov_iid), 1))
     else:
         cov = pstpheno.loadPhen(self.cov_fn)
         X = cov['vals']
         cov_iid = cov['iid']
         # add bias column if not present - #!! LATER -- Bug? should this test be done after intersection in case removing an iid makes it constant?
         if self.offset and sp.all(X.std(0)!=0):
             offset = sp.ones((len(X),1))
             self.X = sp.hstack((X, offset))
     return X, cov_iid
示例#8
0
    def test_preload_files(self):
        logging.info("TestSingleSnp test_preload_files")
        from pysnptools.snpreader import Bed
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps)

        output_file_name = self.file_name("preload_files")

        frame = single_snp(test_snps=bed[:,:10], pheno=pheno, G0=test_snps, mixing=0,
                                  covar=covar, output_file_name=output_file_name
                                  )
        self.compare_files(frame,"one")
    def test_SNC(self):
        logging.info("TestSNC")
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps, count_A1=False)
        snc = bed.read()
        snc.val[:,2] = [0] * snc.iid_count # make SNP #2 have constant values (aka a SNC)

        output_file_name = self.file_name("snc")

        frame = single_snp(test_snps=snc[:,:10], pheno=pheno, G0=snc, mixing=0,leave_out_one_chrom=False,
                                  covar=covar, output_file_name=output_file_name,count_A1=False
                                  )
        self.compare_files(frame,"snc")
    def test_no_cov_b(self):
        logging.info("TestSingleSnp test_no_cov_b")
        test_snps = Bed(self.bedbase, count_A1=False)
        pheno = self.phen_fn

        output_file_name = self.file_name("no_cov_b")
        covar = pstpheno.loadPhen(self.cov_fn)
        covar['vals'] = np.delete(covar['vals'], np.s_[:],1) #Remove all the columns
        covar['header'] = []

        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=test_snps, leave_out_one_chrom=False,
                                  covar=covar, mixing=0,
                                  output_file_name=output_file_name,count_A1=False
                                  )

        self.compare_files(frame,"no_cov")
示例#11
0
    def test_no_cov_b(self):
        logging.info("TestSingleSnp test_no_cov_b")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn

        output_file_name = self.file_name("no_cov_b")
        covar = pstpheno.loadPhen(self.cov_fn)
        covar['vals'] = np.delete(covar['vals'], np.s_[:],1) #Remove all the columns

        frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=test_snps, 
                                  covar=covar, mixing=0,
                                  output_file_name=output_file_name
                                  )

        self.compare_files(frame,"no_cov")
示例#12
0
    def test_preload_files(self):
        logging.info("TestSingleSnp test_preload_files")
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn, vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps)

        output_file_name = self.file_name("preload_files")

        frame = single_snp(test_snps=bed[:, :10],
                           pheno=pheno,
                           G0=test_snps,
                           mixing=0,
                           leave_out_one_chrom=False,
                           covar=covar,
                           output_file_name=output_file_name)
        self.compare_files(frame, "one")
示例#13
0
    def test_preload_files(self):
        logging.info("TestEpistasis test_preload_files")
        from pysnptools.snpreader import Bed
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps)

        output_file = self.file_name("preload_files")

        frame = epistasis(test_snps, pheno, G0=test_snps, 
                                  covar=covar, 
                                  sid_list_0=bed.sid[:10], #first 10 snps
                                  sid_list_1=bed.sid[5:15], #Skip 5 snps, use next 10
                                  output_file_name=output_file
                                  )
        sid0,sid1,pvalue_list =np.array(frame['SNP0']),np.array(frame['SNP1']),np.array(frame['PValue'])
        self.compare_files(sid0,sid1,pvalue_list,"one")
示例#14
0
    def _run_once(self):
        if self._ran_once:
            return
        self._ran_once = None

        if isinstance(self.test_snps, str):
            self.test_snps = Bed(self.test_snps)

        if isinstance(self.G0, str):
            self.G0 = Bed(self.G0)

        if isinstance(self.pheno, str):
            self.pheno = pstpheno.loadOnePhen(self.pheno,
                                              vectorize=True,
                                              missing='NaN')

        if self.covar is not None and isinstance(self.covar, str):
            self.covar = pstpheno.loadPhen(self.covar, missing='NaN')

        if self.G1_or_none is not None and isinstance(self.G1_or_none, str):
            self.G1_or_none = Bed(self.G1_or_none)

        if self.sid_list_0 is None:
            self.sid_list_0 = self.test_snps.sid

        if self.sid_list_1 is None:
            self.sid_list_1 = self.test_snps.sid

        self.set_sid_sets()

        #!!Should fix up to add only of no constant columns - will need to add a test case for this
        if self.covar is None:
            self.covar = np.ones((self.test_snps.iid_count, 1))
        else:
            self.covar = np.hstack(
                (self.covar['vals'], np.ones((self.test_snps.iid_count, 1))))
        self.n_cov = self.covar.shape[1]

        if self.output_file_or_none is None:
            self.__tempdirectory = ".working"
        else:
            self.__tempdirectory = self.output_file_or_none + ".working"

        self._ran_once = True
示例#15
0
    def test_no_cov_b(self):
        logging.info("TestEpistasis test_no_cov_b")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn

        output_file = self.file_name("no_cov_b")
        covar = pstpheno.loadPhen(self.cov_fn)
        covar['vals'] = np.delete(covar['vals'], np.s_[:],1) #Remove all the columns

        frame = epistasis(test_snps, pheno, G0=test_snps, 
                                  covar=covar,
                                  sid_list_0=test_snps.sid[:10], #first 10 snps
                                  sid_list_1=test_snps.sid[5:15], #Skip 5 snps, use next 10
                                  output_file_name=output_file
                                  )

        sid0,sid1,pvalue_list =np.array(frame['SNP0']),np.array(frame['SNP1']),np.array(frame['PValue'])
        self.compare_files(sid0,sid1,pvalue_list,"no_cov")
示例#16
0
    def test_no_cov_b(self):
        logging.info("TestSingleSnp test_no_cov_b")
        from pysnptools.snpreader import Bed
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn

        output_file_name = self.file_name("no_cov_b")
        covar = pstpheno.loadPhen(self.cov_fn)
        covar['vals'] = np.delete(covar['vals'], np.s_[:],
                                  1)  #Remove all the columns

        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps,
                           covar=covar,
                           mixing=0,
                           output_file_name=output_file_name)

        self.compare_files(frame, "no_cov")
示例#17
0
    def _run_once(self):
        if self._ran_once:
            return
        self._ran_once = None

        if isinstance(self.test_snps, str):
            self.test_snps = Bed(self.test_snps)

        if isinstance(self.G0, str):
            self.G0 = Bed(self.G0)

        if isinstance(self.pheno, str):
            self.pheno = pstpheno.loadOnePhen(self.pheno,vectorize=True) #!! what about missing=-9?

        if self.covar is not None and isinstance(self.covar, str):
            self.covar = pstpheno.loadPhen(self.covar)#!! what about missing=-9?

        if self.G1_or_none is not None and isinstance(self.G1_or_none, str):
            self.G1_or_none = Bed(self.G1_or_none)

        if self.sid_list_0 is None:
            self.sid_list_0 = self.test_snps.sid

        if self.sid_list_1 is None:
            self.sid_list_1 = self.test_snps.sid

        self.set_sid_sets()

        #!!Should fix up to add only of no constant columns - will need to add a test case for this
        if self.covar is None:
            self.covar = np.ones((self.test_snps.iid_count, 1))
        else:
            self.covar = np.hstack((self.covar['vals'],np.ones((self.test_snps.iid_count, 1))))
        self.n_cov = self.covar.shape[1] 


        if self.output_file_or_none is None:
            self.__tempdirectory = ".working"
        else:
            self.__tempdirectory = self.output_file_or_none + ".working"

        self._ran_once = True
示例#18
0
    def test_SNC(self):
        logging.info("TestSNC")
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn, vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps, count_A1=False)
        snc = bed.read()
        snc.val[:, 2] = 0  # make SNP #2 have constant values (aka a SNC)

        output_file_name = self.file_name("snc")

        frame = single_snp(test_snps=snc[:, :10],
                           pheno=pheno,
                           G0=snc,
                           mixing=0,
                           leave_out_one_chrom=False,
                           covar=covar,
                           output_file_name=output_file_name,
                           count_A1=False)
        self.compare_files(frame, "snc")
示例#19
0
    def test_no_cov_b(self):
        logging.info("TestSingleSnp test_no_cov_b")
        test_snps = Bed(self.bedbase)
        pheno = self.phen_fn

        output_file_name = self.file_name("no_cov_b")
        covar = pstpheno.loadPhen(self.cov_fn)
        covar['vals'] = np.delete(covar['vals'], np.s_[:],
                                  1)  #Remove all the columns
        covar['header'] = []

        frame = single_snp(test_snps=test_snps[:, :10],
                           pheno=pheno,
                           G0=test_snps,
                           leave_out_one_chrom=False,
                           covar=covar,
                           mixing=0,
                           output_file_name=output_file_name)

        self.compare_files(frame, "no_cov")
示例#20
0
def _pheno_fixup(pheno_input, iid_source_if_none=None):
    if isinstance(pheno_input, str):
        return pstpheno.loadPhen(pheno_input)  #!!what about missing=-9?

    if pheno_input is None:
        ret = {
            'header': [],
            'vals': np.empty((iid_source_if_none['vals'].shape[0], 0)),
            'iid': iid_source_if_none['iid']
        }
        return ret

    if len(pheno_input['vals'].shape) == 1:
        ret = {
            'header': pheno_input['header'],
            'vals': np.reshape(pheno_input['vals'], (-1, 1)),
            'iid': pheno_input['iid']
        }
        return ret

    return pheno_input
示例#21
0
def _pheno_fixup(pheno_input, iid_source_if_none=None):
    if isinstance(pheno_input, str):
        return pstpheno.loadPhen(pheno_input) #!!what about missing=-9?

    if pheno_input is None:
        ret = {
        'header':[],
        'vals': np.empty((iid_source_if_none['vals'].shape[0], 0)),
        'iid':iid_source_if_none['iid']
        }
        return ret

    if len(pheno_input['vals'].shape) == 1:
        ret = {
        'header' : pheno_input['header'],
        'vals' : np.reshape(pheno_input['vals'],(-1,1)),
        'iid' : pheno_input['iid']
        }
        return ret

    return pheno_input
示例#22
0
    def test_SNC(self):
        logging.info("TestSNC")
        from pysnptools.snpreader import Bed
        test_snps = self.bedbase
        pheno = pstpheno.loadOnePhen(self.phen_fn, vectorize=True)
        covar = pstpheno.loadPhen(self.cov_fn)
        bed = Bed(test_snps)
        snc = bed.read()
        snc.val[:, 2] = [
            0
        ] * snc.iid_count  # make SNP #2 have constant values (aka a SNC)

        output_file_name = self.file_name("snc")

        frame = single_snp(test_snps=snc[:, :10],
                           pheno=pheno,
                           G0=snc,
                           mixing=0,
                           covar=covar,
                           output_file_name=output_file_name)
        self.compare_files(frame, "snc")
示例#23
0
def loadCovars(bed, covarFile):
    covarsDict = phenoUtils.loadPhen(covarFile)
    checkIntersection(bed, covarsDict, 'covariates', checkSuperSet=True)
    _, covarsDict = pstutil.intersect_apply([bed, covarsDict])
    covar = covarsDict['vals']
    return covar
示例#24
0
文件: gpUtils.py 项目: omerwe/MKLMM
def loadCovars(bed, covarFile):
	covarsDict = phenoUtils.loadPhen(covarFile)
	checkIntersection(bed, covarsDict, 'covariates', checkSuperSet=True)
	_, covarsDict = pstutil.intersect_apply([bed, covarsDict])
	covar = covarsDict['vals']
	return covar