Пример #1
0
 def analysis(self, chrom, start, stop, win_len=200, sift_len=20):
     global pos_gc, win_gc
     chrom = str(chrom)
     tmp_start = start = int(start)
     stop = int(stop)
     flank_stop = min(
         int(stop) + win_len + 1,
         self.reference.get_reference_length(chrom))
     try:
         rmtk = list(self.rmtk.fetch(chrom, start, stop))
         bases = self.reference.fetch(chrom, start, flank_stop)
     except Exception as err:
         raise ValueError(err)
     self.contigs.add(chrom)
     for pos in xrange(start, stop + 1):
         flank_b = max(pos - win_len / 2, 0)
         flank_e = min(flank_b + win_len + 1,
                       self.reference.get_reference_length(chrom))
         base_gc = count_gc(
             self.reference.fetch(chrom, flank_b, flank_e).upper())
         pos_gc[chrom][pos] = base_gc
     feback = list()
     if len(rmtk):
         for lines in rmtk:
             rows = lines.strip().split("\t")
             begin = int(rows[1])
             end = int(rows[2])
             if begin > start:
                 feback.append([start, begin - 1])
             else:
                 begin = start
             end = min(stop, end)
             if begin > end:
                 continue
             start = end + 1
     else:
         feback.append([start, stop])
     for s, e in list(join_ranges(feback, offset=60)):
         if e - s < sift_len:
             continue
         for win in xrange(s, e, sift_len):
             offset = win - tmp_start
             seq = bases[offset:offset + win_len].upper()
             gc_radio = count_gc(seq)
             if 0.1 < gc_radio < 0.9:
                 win_gc[(chrom, win, win + win_len - 1)] = gc_radio
             else:
                 continue
Пример #2
0
 def analysis(self, chrom, start, stop, win_len=200, sift_len=20):
     global pos_gc, win_gc
     chrom = str(chrom)
     tmp_start = start = int(start)
     stop = int(stop)
     flank_stop = min(int(stop) + win_len + 1, self.reference.get_reference_length(chrom))
     try:
         rmtk = list(self.rmtk.fetch(chrom, start, stop))
         bases = self.reference.fetch(chrom, start, flank_stop)
     except Exception as err:
         raise ValueError(err)
     self.contigs.add(chrom)
     for pos in xrange(start, stop + 1):
         flank_b = max(pos - win_len / 2, 0)
         flank_e = min(flank_b + win_len + 1, self.reference.get_reference_length(chrom))
         base_gc = count_gc(self.reference.fetch(chrom, flank_b, flank_e).upper())
         pos_gc[chrom][pos] = base_gc
     feback = list()
     if len(rmtk):
         for lines in rmtk:
             rows = lines.strip().split("\t")
             begin = int(rows[1])
             end = int(rows[2])
             if begin > start:
                 feback.append([start, begin - 1])
             else:
                 begin = start
             end = min(stop, end)
             if begin > end:
                 continue
             start = end + 1
     else:
         feback.append([start, stop])
     for s, e in list(join_ranges(feback, offset=60)):
         if e - s < sift_len:
             continue
         for win in xrange(s, e, sift_len):
             offset = win - tmp_start
             seq = bases[offset : offset + win_len].upper()
             gc_radio = count_gc(seq)
             if 0.1 < gc_radio < 0.9:
                 win_gc[(chrom, win, win + win_len - 1)] = gc_radio
             else:
                 continue
Пример #3
0
	def annotation(self, sample, debug=False):
		output = os.path.join(self.outdir, sample, "%s.cnv.anno.tsv" % sample)
		f_out = smart_open(output, 'w')
		titles = ["#Chrom", "Ploid", "Start", "Stop", "length", "copyNumber", "Mtype", "meanP", "Z-score", "GCpr",
		          "MutationName"]
		dbtitle = self.DBAnno.dbtitle.split("\t")
		titles.extend(dbtitle)
		f_out.write("\t".join(titles) + '\n')
		for chrom in self.contigs:
			cnvs = os.path.join(self.outdir, sample, "%s.cnv" % chrom)
			if not os.path.exists(cnvs):
				continue
			f_in = smart_open(cnvs)
			for line in f_in:
				rows = line.strip().split("\t")
				try:
					chrom = str(rows[0])
					start = int(rows[2])
					stop = int(rows[3])
					mtype = str(rows[6])
				except ValueError:
					continue
				z_s = self.z_score(chrom, start, stop, sample)
				gcr = count_gc(self.DBAnno.refer.fetch(chrom, start, stop))
				if not (0.3 <= gcr <= 0.7):
					continue
				variation = dict([("Chrom", chrom), ("Start", start), ("Stop", stop), ("Mtype", mtype)])
				m_name = set()
				for hgvs in self.HGVS.annobed(chrom, start, stop):
					trans = str(hgvs.Transcript)
					gene = str(hgvs.geneSym)
					chgvs = str(hgvs.cHgvs)
					protein = str(hgvs.Protein)
					exons = str(hgvs.ExonRegions)
					mess = ":".join(filter(lambda x: x != ".", [trans, protein, gene, chgvs, exons]))
					m_name.add(mess)
				dbinfo = self.DBAnno.dbanno(variation)
				anno_message = [str(i) for i in rows]
				anno_message.append(str(z_s))
				anno_message.append(str(gcr))
				anno_message.append("|".join(m_name))
				for i in dbtitle:
					if i in dbinfo:
						anno_message.append("|".join(dbinfo[i]))
					else:
						anno_message.append(".")
				f_out.write("\t".join(anno_message) + '\n')
			f_in.close()
			if not debug:
				os.remove(cnvs)
		f_out.close()
		return output