Пример #1
0
    def __iter__(self):
        from pysam import Tabixfile, asTuple
        f = Tabixfile(self.filename, mode='r')
        try:
            # header row
            if self.header is not None:
                yield self.header
            else:
                # assume last header line has fields
                h = list(f.header)
                if len(h) > 0:
                    header_line = text_type(h[-1], encoding='ascii')
                    yield tuple(header_line.split('\t'))

            # data rows
            for row in f.fetch(reference=self.reference,
                               start=self.start,
                               end=self.stop,
                               region=self.region,
                               parser=asTuple()):
                yield tuple(row)

        except:
            raise
        finally:
            f.close()
Пример #2
0
 def __init__(self, chromosome, position, annotation_table_file):
     annotation_table = Tabixfile(annotation_table_file)
     self.line = annotation_table.fetch(reference=chromosome,
                                        start=position - 1,
                                        end=position).next()
     self.chromosome, \
     self.position, \
     self.reference_base, \
     self.genic, \
     self.exonic, \
     self.intronic, \
     self.intergenic, \
     self.utr5, \
     self.utr3, \
     self.fold0, \
     self.fold4, \
     self.fold2, \
     self.fold3, \
     self.CDS, \
     self.mRNA, \
     self.rRNA, \
     self.tRNA, \
     self.feature_names, \
     self.feature_types, \
     self.feature_ID, \
     self.cds_position, \
     self.strand, \
     self.frame, \
     self.codon, \
     self.aa, \
     self.degen, \
     self.FPKM, \
     self.rho, \
     self.FAIRE, \
     self.recombination, \
     self.mutability, \
     self.quebec_alleles = self.line.split('\t')
     self.position = int(self.position)
     annotation_table.close()
Пример #3
0
 def __iter__(self):
     try:
         from pysam import Tabixfile, asTuple
     except ImportError as e:
         raise UnsatisfiedDependency(e, dep_message)
     f = Tabixfile(self.filename, mode='r')
     try:
         # header row
         if self.header is not None:
             yield self.header
         else:
             # assume last header line has fields
             h = list(f.header)
             if len(h) > 0:
                 yield tuple(h[-1].split('\t'))
         # data rows
         for row in f.fetch(reference=self.reference, start=self.start, end=self.end, region=self.region, parser=asTuple()):
             yield tuple(row)
     except:
         raise
     finally:
         f.close()
Пример #4
0
    def __iter__(self):
        from pysam import Tabixfile, asTuple
        f = Tabixfile(self.filename, mode='r')
        try:
            # header row
            if self.header is not None:
                yield self.header
            else:
                # assume last header line has fields
                h = list(f.header)
                if len(h) > 0:
                    header_line = text_type(h[-1], encoding='ascii')
                    yield tuple(header_line.split('\t'))

            # data rows
            for row in f.fetch(reference=self.reference, start=self.start,
                               end=self.stop, region=self.region,
                               parser=asTuple()):
                yield tuple(row)

        except:
            raise
        finally:
            f.close()
Пример #5
0
 def __iter__(self):
     try:
         from pysam import Tabixfile, asTuple
     except ImportError as e:
         raise UnsatisfiedDependency(e, dep_message)
     f = Tabixfile(self.filename, mode="r")
     try:
         # header row
         if self.header is not None:
             yield self.header
         else:
             # assume last header line has fields
             h = list(f.header)
             if len(h) > 0:
                 yield tuple(h[-1].split("\t"))
         # data rows
         for row in f.fetch(
             reference=self.reference, start=self.start, end=self.end, region=self.region, parser=asTuple()
         ):
             yield tuple(row)
     except:
         raise
     finally:
         f.close()
genofinfile.close()

genoinds = [genofins.index(x) + 6 for x in officialfindivs]
y = {}
currbimbam = open(currfiles + '.bimbam','w')
#t0 = time.time()
for snp in masterdic.keys():
#for snp in masterdic.keys()[0:1000]:
	chrm = masterdic[snp][0]
	if chrm == 'chrm':
		continue
	tabixer = Tabixfile('/mnt/lustre/home/cusanovich/500HT/Imputed1415/ByChr/hutt.all.imputed.' + chrm + '.txt.gz')
	tempgenos = [x.split('\t') for x in tabixer.fetch(chrm,int(masterdic[snp][1])-1,int(masterdic[snp][2]))][0]
	genos = [tempgenos[x] for x in range(0,6) + genoinds]
	tabixer.close()
	y[snp] = [genos[3], 'A', 'G'] + genos[6:]
	print >> currbimbam, ", ".join(y)

#t1 = time.time()
#print t1-t0
currbimbam.close()

#genomat = matrix_reader(genodir + 'hutt.imputed.dhssnps.bimbam',sep=",")
print "Running GEMMA..."
gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -c ' + currfiles + '.covariates -lmm 4 -maf 0.05 -o curr_' + pheno)
t0 = time.time()
ifier(gemmer)
t1 = time.time()
print t1-t0
#currresults = open(genodir + 'output/curr_' + pheno + '.assoc.txt','r')
Пример #7
0
	if not regressPCs:
		phener = ('cut -f' + str(int(exprcoldic[gene]) + 1) + ' -d" " ' +
			hmdir + '500HT/Exprs/qqnorm.500ht' + gccor + covcor +
			'.ordered.' + chrm + '.bimbam > ' + currfiles + '.pheno')
		ifier(phener)
	currgenos = []
	####Pull genotypes for the SNPs in cis, if genotypes not already in dictionary: go to geno file and pull in appropriate data
	for snp in masterdic[gene]:
		try:
			currgenos.append(", ".join(genodic[snp]))
		except KeyError:
			#tabixer = pysam.Tabixfile('/mnt/lustre/home/cusanovich/500HT/Imputed1415/ByChr/hutt.imputed.' + chrm + '.txt.gz')
			#tabixer = pysam.Tabixfile('/mnt/lustre/home/cusanovich/500HT/' + mapper + '/ByChr/hutt.' + mapper + '.' + distance + '.' + chrm + '.txt.gz')
			tabixer = Tabixfile('/mnt/lustre/home/cusanovich/500HT/Imputed1415/ByChr/hutt' + mapper + '.' + chrm + '.txt.gz')
			genos = [x.split('\t') for x in tabixer.fetch(chrm,int(snpdic[snp][1]),int(snpdic[snp][2]))][0]
			tabixer.close()
			y = [genos[3], 'A', 'G'] + genos[6:len(genos)]
			genodic[snp] = y
			currgenos.append(", ".join(genodic[snp]))
	currbimbam = open(currfiles + '.bimbam','w')
	print >> currbimbam, "\n".join(currgenos)
	currbimbam.close()
	#print "Running GEMMA..."
	if regressPCs:
		gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -lmm 4 -maf 0.05 -o curr_' + chrm + '_pc' + str(pcs) + '_' + correction)
		ifier(gemmer)
	if not regressPCs:
		gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -c ' + currfiles + '.pcs.txt -lmm 4 -maf 0.05 -o curr_' + chrm + '_pc' + str(pcs) + '_' + correction)
		ifier(gemmer)
	currresults = open(genodir + '/output/curr_' + chrm + '_pc' + str(pcs) + '_' + correction + '.assoc.txt','r')
	pmin = 1.1