Пример #1
0
    def getRegion(self,
                  size=3e4,
                  min_nSNPs=1,
                  chrom_i=None,
                  pos_min=None,
                  pos_max=None):
        """
        Sample a region from the piece of genotype X, chrom, pos
        minSNPnum:  minimum number of SNPs contained in the region
        Ichrom:  restrict X to chromosome Ichrom before taking the region
        cis:        bool vector that marks the sorted region
        region:  vector that contains chrom and init and final position of the region
        """
        bim = plink_reader.readBIM(self.bfile, usecols=(0, 1, 2, 3))

        chrom = SP.array(bim[:, 0], dtype=int)
        pos = SP.array(bim[:, 3], dtype=int)

        if chrom_i is None:
            n_chroms = chrom.max()
            chrom_i = int(SP.ceil(SP.rand() * n_chroms))

        pos = pos[chrom == chrom_i]
        chrom = chrom[chrom == chrom_i]

        ipos = SP.ones(len(pos), dtype=bool)
        if pos_min is not None:
            ipos = SP.logical_and(ipos, pos_min < pos)

        if pos_max is not None:
            ipos = SP.logical_and(ipos, pos < pos_max)

        pos = pos[ipos]
        chrom = chrom[ipos]

        if size == 1:
            # select single SNP
            idx = int(SP.ceil(pos.shape[0] * SP.rand()))
            cis = SP.arange(pos.shape[0]) == idx
            region = SP.array([chrom_i, pos[idx], pos[idx]])
        else:
            while 1:
                idx = int(SP.floor(pos.shape[0] * SP.rand()))
                posT1 = pos[idx]
                posT2 = pos[idx] + size
                if posT2 <= pos.max():
                    cis = chrom == chrom_i
                    cis *= (pos > posT1) * (pos < posT2)
                    if cis.sum() > min_nSNPs: break
            region = SP.array([chrom_i, posT1, posT2])

        start = SP.nonzero(cis)[0].min()
        nSNPs = cis.sum()
        rv = plink_reader.readBED(self.bfile,
                                  useMAFencoding=True,
                                  start=start,
                                  nSNPs=nSNPs,
                                  bim=bim)
        Xr = rv['snps']
        return Xr, region
Пример #2
0
def scan(bfile,Y,cov,null,wnds,minSnps,i0,i1,perm_i,resfile,F):

    if perm_i is not None:
        print 'Generating permutation (permutation %d)'%perm_i
        NP.random.seed(perm_i)
        perm = NP.random.permutation(Y.shape[0])

    mtSet = MTST.MultiTraitSetTest(Y,S_XX=cov['eval'],U_XX=cov['evec'],F=F)
    mtSet.setNull(null)
    bim = plink_reader.readBIM(bfile,usecols=(0,1,2,3))
    fam = plink_reader.readFAM(bfile,usecols=(0,1))
   
    print 'fitting model'
    wnd_file = csv.writer(open(resfile,'wb'),delimiter='\t')
    for wnd_i in range(i0,i1):
        print '.. window %d - (%d, %d-%d) - %d snps'%(wnd_i,int(wnds[wnd_i,1]),int(wnds[wnd_i,2]),int(wnds[wnd_i,3]),int(wnds[wnd_i,-1]))
        if int(wnds[wnd_i,-1])<minSnps:
            print 'SKIPPED: number of snps lower than minSnps'
            continue
        #RV = bed.read(PositionRange(int(wnds[wnd_i,-2]),int(wnds[wnd_i,-1])))
        RV = plink_reader.readBED(bfile, useMAFencoding=True, blocksize = 1, start = int(wnds[wnd_i,4]), nSNPs = int(wnds[wnd_i,5]), order  = 'F',standardizeSNPs=False,ipos = 2,bim=bim,fam=fam)
        
        Xr = RV['snps']
        if perm_i is not None:
            Xr = Xr[perm,:]
        rv = mtSet.optimize(Xr)
        line = NP.concatenate([wnds[wnd_i,:],rv['LLR']])
        wnd_file.writerow(line)
    pass
Пример #3
0
    def getRegion(self, size=3e4, min_nSNPs=1, chrom_i=None, pos_min=None, pos_max=None):
        """
        Sample a region from the piece of genotype X, chrom, pos
        minSNPnum:  minimum number of SNPs contained in the region
        Ichrom:  restrict X to chromosome Ichrom before taking the region
        cis:        bool vector that marks the sorted region
        region:  vector that contains chrom and init and final position of the region
        """
        bim = plink_reader.readBIM(self.bfile, usecols=(0, 1, 2, 3))

        chrom = SP.array(bim[:, 0], dtype=int)
        pos = SP.array(bim[:, 3], dtype=int)

        if chrom_i is None:
            n_chroms = chrom.max()
            chrom_i = int(SP.ceil(SP.rand() * n_chroms))

        pos = pos[chrom == chrom_i]
        chrom = chrom[chrom == chrom_i]

        ipos = SP.ones(len(pos), dtype=bool)
        if pos_min is not None:
            ipos = SP.logical_and(ipos, pos_min < pos)

        if pos_max is not None:
            ipos = SP.logical_and(ipos, pos < pos_max)

        pos = pos[ipos]
        chrom = chrom[ipos]

        if size == 1:
            # select single SNP
            idx = int(SP.ceil(pos.shape[0] * SP.rand()))
            cis = SP.arange(pos.shape[0]) == idx
            region = SP.array([chrom_i, pos[idx], pos[idx]])
        else:
            while 1:
                idx = int(SP.floor(pos.shape[0] * SP.rand()))
                posT1 = pos[idx]
                posT2 = pos[idx] + size
                if posT2 <= pos.max():
                    cis = chrom == chrom_i
                    cis *= (pos > posT1) * (pos < posT2)
                    if cis.sum() > min_nSNPs:
                        break
            region = SP.array([chrom_i, posT1, posT2])

        start = SP.nonzero(cis)[0].min()
        nSNPs = cis.sum()
        rv = plink_reader.readBED(self.bfile, useMAFencoding=True, start=start, nSNPs=nSNPs, bim=bim)
        Xr = rv["snps"]
        return Xr, region
Пример #4
0
def scan(bfile, Y, cov, null, wnds, minSnps, i0, i1, perm_i, resfile, F):

    if perm_i is not None:
        print 'Generating permutation (permutation %d)' % perm_i
        NP.random.seed(perm_i)
        perm = NP.random.permutation(Y.shape[0])

    mtSet = MTST.MultiTraitSetTest(Y, S_XX=cov['eval'], U_XX=cov['evec'], F=F)
    mtSet.setNull(null)
    bim = plink_reader.readBIM(bfile, usecols=(0, 1, 2, 3))
    fam = plink_reader.readFAM(bfile, usecols=(0, 1))

    print 'fitting model'
    wnd_file = csv.writer(open(resfile, 'wb'), delimiter='\t')
    for wnd_i in range(i0, i1):
        print '.. window %d - (%d, %d-%d) - %d snps' % (
            wnd_i, int(wnds[wnd_i, 1]), int(wnds[wnd_i, 2]), int(
                wnds[wnd_i, 3]), int(wnds[wnd_i, -1]))
        if int(wnds[wnd_i, -1]) < minSnps:
            print 'SKIPPED: number of snps lower than minSnps'
            continue
        #RV = bed.read(PositionRange(int(wnds[wnd_i,-2]),int(wnds[wnd_i,-1])))
        RV = plink_reader.readBED(bfile,
                                  useMAFencoding=True,
                                  blocksize=1,
                                  start=int(wnds[wnd_i, 4]),
                                  nSNPs=int(wnds[wnd_i, 5]),
                                  order='F',
                                  standardizeSNPs=False,
                                  ipos=2,
                                  bim=bim,
                                  fam=fam)

        Xr = RV['snps']
        if perm_i is not None:
            Xr = Xr[perm, :]
        rv = mtSet.optimize(Xr)
        line = NP.concatenate([wnds[wnd_i, :], rv['LLR']])
        wnd_file.writerow(line)
    pass