Exemplo n.º 1
0
 def col_property(self):
     """*same as* :attr:`pos`
     """
     if not hasattr(self, "_col_property"):
         self._col, self._col_property = SnpReader._read_map_or_bim(
             self.filename, remove_suffix="bed", add_suffix="bim")
     return self._col_property
Exemplo n.º 2
0
 def col(self):
     """*same as* :attr:`sid`
     """
     if not hasattr(self, "_col"):
         self._col, self._col_property = SnpReader._read_map_or_bim(
             self.filename,
             remove_suffix="pgen",
             add_suffix="bim",
             max_filesize=1E9)
     return self._col
Exemplo n.º 3
0
    def run_once(self):
        if (self._ran_once):
            return
        self._ran_once = True

        self._iid = SnpReader._read_fam(self.dat_filename,remove_suffix="dat")
        self._sid, self._pos = SnpReader._read_map_or_bim(self.dat_filename,remove_suffix="dat", add_suffix="map")

        self._assert_iid_sid_pos()


        return self
Exemplo n.º 4
0
 def _read_pstdata(self):
     col, col_property = SnpReader._read_map_or_bim(self.filename,remove_suffix="ped", add_suffix="map")
     ped = np.loadtxt(self.filename, dtype='str', comments=None)
     row = ped[:,0:2]
     snpsstr = ped[:,6::]
     inan=snpsstr==self.missing
     snps = np.zeros((snpsstr.shape[0],snpsstr.shape[1]/2))
     for i in xrange(snpsstr.shape[1]//2):
         snps[inan[:,2*i],i]=np.nan
         vals=snpsstr[~inan[:,2*i],2*i:2*(i+1)]
         snps[~inan[:,2*i],i]+=(vals==vals[0,0]).sum(1)
     snpdata = SnpData(iid=row,sid=col,pos=col_property,val=snps)
     return snpdata
Exemplo n.º 5
0
 def _read_pstdata(self):
     row = SnpReader._read_fam(self.filename,remove_suffix="dat")
     col, col_property = SnpReader._read_map_or_bim(self.filename,remove_suffix="dat", add_suffix="map")
     if len(row)==0 or len(col)==0:
         return SnpData(iid=row,sid=col,pos=col_property,val=np.empty([len(row),len(col)]))
     datfields = pd.read_csv(self.filename,delimiter = '\t',header=None,index_col=False)
     if not np.array_equal(np.array(datfields[0],dtype="string"), col) : raise Exception("Expect snp list in map file to exactly match snp list in dat file")
     del datfields[0]
     del datfields[1]
     del datfields[2]
     assert len(row) == datfields.shape[1], "Expect # iids in fam file to match dat file"
     val = datfields.as_matrix().T
     snpdata = SnpData(iid=row,sid=col,pos=col_property,val=val)
     return snpdata
Exemplo n.º 6
0
 def _read_pstdata(self):
     row = SnpReader._read_fam(self.filename,remove_suffix="dat")
     col, col_property = SnpReader._read_map_or_bim(self.filename,remove_suffix="dat", add_suffix="map")
     if len(row)==0 or len(col)==0:
         return SnpData(iid=row,sid=col,pos=col_property,val=np.empty([len(row),len(col)]))
     datfields = pd.read_csv(self.filename,delimiter = '\t',header=None,index_col=False)
     if not np.array_equal(np.array(datfields[0],dtype="string"), col) : raise Exception("Expect snp list in map file to exactly match snp list in dat file")
     del datfields[0]
     del datfields[1]
     del datfields[2]
     assert len(row) == datfields.shape[1], "Expect # iids in fam file to match dat file"
     val = datfields.as_matrix().T
     snpdata = SnpData(iid=row,sid=col,pos=col_property,val=val)
     return snpdata
Exemplo n.º 7
0
 def _read_pstdata(self):
     col, col_property = SnpReader._read_map_or_bim(self.filename,
                                                    remove_suffix="ped",
                                                    add_suffix="map")
     ped = np.loadtxt(self.filename, dtype='str', comments=None)
     row = ped[:, 0:2]
     snpsstr = ped[:, 6::]
     inan = snpsstr == self.missing
     snps = np.zeros((snpsstr.shape[0], snpsstr.shape[1] / 2))
     for i in xrange(snpsstr.shape[1] // 2):
         snps[inan[:, 2 * i], i] = np.nan
         vals = snpsstr[~inan[:, 2 * i], 2 * i:2 * (i + 1)]
         snps[~inan[:, 2 * i], i] += (vals == vals[0, 0]).sum(1)
     snpdata = SnpData(iid=row, sid=col, pos=col_property, val=snps)
     return snpdata
Exemplo n.º 8
0
    def _run_once(self):
        if self._ran_once:
            return
        self._ran_once = True

        if not hasattr(self, "_row"):
            self._row = SnpReader._read_fam(self.filename, remove_suffix="bed")

        if not hasattr(self, "_col") or not hasattr(self, "_col_property"):
            self._col, self._col_property = SnpReader._read_map_or_bim(
                self.filename, remove_suffix="bed", add_suffix="bim")
        self._assert_iid_sid_pos()

        if not self.skip_format_check:
            self._open_bed()
            self._close_bed()
Exemplo n.º 9
0
    def _run_once(self):
        if self._ran_once:
            return
        self._ran_once = True

        self._row = SnpReader._read_fam(self.filename,remove_suffix="bed")
        self._col, self._col_property = SnpReader._read_map_or_bim(self.filename,remove_suffix="bed", add_suffix="bim")
        self._assert_iid_sid_pos()

        bedfile = SnpReader._name_of_other_file(self.filename,"bed","bed")
        self._filepointer = open(bedfile, "rb")
        mode = self._filepointer.read(2)
        if mode != 'l\x1b': raise Exception('No valid binary BED file')
        mode = self._filepointer.read(1) #\x01 = SNP major \x00 = individual major
        if mode != '\x01': raise Exception('only SNP-major is implemented')
        logging.info("bed file is open {0}".format(bedfile))
Exemplo n.º 10
0
    def run_once(self):
        if (self._ran_once):
            return
        self._ran_once = True

        self._sid, self._pos = SnpReader._read_map_or_bim(self.basefilename,remove_suffix="bed", add_suffix="map")


        pedfile = SnpReader._name_of_other_file(self.basefilename,remove_suffix="ped", add_suffix="ped")
        ped = np.loadtxt(pedfile,dtype = 'str',comments=None)
        self._iid = ped[:,0:2]

        self._assert_iid_sid_pos()

        snpsstr = ped[:,6::]
        inan=snpsstr==self.missing
        self._snps = np.zeros((snpsstr.shape[0],snpsstr.shape[1]/2))
        for i in xrange(snpsstr.shape[1]/2):
            self._snps[inan[:,2*i],i]=np.nan
            vals=snpsstr[~inan[:,2*i],2*i:2*(i+1)]
            self._snps[~inan[:,2*i],i]+=(vals==vals[0,0]).sum(1)