def analysis(self, chrom, start, stop, win_len=200, sift_len=20): global pos_gc, win_gc chrom = str(chrom) tmp_start = start = int(start) stop = int(stop) flank_stop = min( int(stop) + win_len + 1, self.reference.get_reference_length(chrom)) try: rmtk = list(self.rmtk.fetch(chrom, start, stop)) bases = self.reference.fetch(chrom, start, flank_stop) except Exception as err: raise ValueError(err) self.contigs.add(chrom) for pos in xrange(start, stop + 1): flank_b = max(pos - win_len / 2, 0) flank_e = min(flank_b + win_len + 1, self.reference.get_reference_length(chrom)) base_gc = count_gc( self.reference.fetch(chrom, flank_b, flank_e).upper()) pos_gc[chrom][pos] = base_gc feback = list() if len(rmtk): for lines in rmtk: rows = lines.strip().split("\t") begin = int(rows[1]) end = int(rows[2]) if begin > start: feback.append([start, begin - 1]) else: begin = start end = min(stop, end) if begin > end: continue start = end + 1 else: feback.append([start, stop]) for s, e in list(join_ranges(feback, offset=60)): if e - s < sift_len: continue for win in xrange(s, e, sift_len): offset = win - tmp_start seq = bases[offset:offset + win_len].upper() gc_radio = count_gc(seq) if 0.1 < gc_radio < 0.9: win_gc[(chrom, win, win + win_len - 1)] = gc_radio else: continue
def analysis(self, chrom, start, stop, win_len=200, sift_len=20): global pos_gc, win_gc chrom = str(chrom) tmp_start = start = int(start) stop = int(stop) flank_stop = min(int(stop) + win_len + 1, self.reference.get_reference_length(chrom)) try: rmtk = list(self.rmtk.fetch(chrom, start, stop)) bases = self.reference.fetch(chrom, start, flank_stop) except Exception as err: raise ValueError(err) self.contigs.add(chrom) for pos in xrange(start, stop + 1): flank_b = max(pos - win_len / 2, 0) flank_e = min(flank_b + win_len + 1, self.reference.get_reference_length(chrom)) base_gc = count_gc(self.reference.fetch(chrom, flank_b, flank_e).upper()) pos_gc[chrom][pos] = base_gc feback = list() if len(rmtk): for lines in rmtk: rows = lines.strip().split("\t") begin = int(rows[1]) end = int(rows[2]) if begin > start: feback.append([start, begin - 1]) else: begin = start end = min(stop, end) if begin > end: continue start = end + 1 else: feback.append([start, stop]) for s, e in list(join_ranges(feback, offset=60)): if e - s < sift_len: continue for win in xrange(s, e, sift_len): offset = win - tmp_start seq = bases[offset : offset + win_len].upper() gc_radio = count_gc(seq) if 0.1 < gc_radio < 0.9: win_gc[(chrom, win, win + win_len - 1)] = gc_radio else: continue
def annotation(self, sample, debug=False): output = os.path.join(self.outdir, sample, "%s.cnv.anno.tsv" % sample) f_out = smart_open(output, 'w') titles = ["#Chrom", "Ploid", "Start", "Stop", "length", "copyNumber", "Mtype", "meanP", "Z-score", "GCpr", "MutationName"] dbtitle = self.DBAnno.dbtitle.split("\t") titles.extend(dbtitle) f_out.write("\t".join(titles) + '\n') for chrom in self.contigs: cnvs = os.path.join(self.outdir, sample, "%s.cnv" % chrom) if not os.path.exists(cnvs): continue f_in = smart_open(cnvs) for line in f_in: rows = line.strip().split("\t") try: chrom = str(rows[0]) start = int(rows[2]) stop = int(rows[3]) mtype = str(rows[6]) except ValueError: continue z_s = self.z_score(chrom, start, stop, sample) gcr = count_gc(self.DBAnno.refer.fetch(chrom, start, stop)) if not (0.3 <= gcr <= 0.7): continue variation = dict([("Chrom", chrom), ("Start", start), ("Stop", stop), ("Mtype", mtype)]) m_name = set() for hgvs in self.HGVS.annobed(chrom, start, stop): trans = str(hgvs.Transcript) gene = str(hgvs.geneSym) chgvs = str(hgvs.cHgvs) protein = str(hgvs.Protein) exons = str(hgvs.ExonRegions) mess = ":".join(filter(lambda x: x != ".", [trans, protein, gene, chgvs, exons])) m_name.add(mess) dbinfo = self.DBAnno.dbanno(variation) anno_message = [str(i) for i in rows] anno_message.append(str(z_s)) anno_message.append(str(gcr)) anno_message.append("|".join(m_name)) for i in dbtitle: if i in dbinfo: anno_message.append("|".join(dbinfo[i])) else: anno_message.append(".") f_out.write("\t".join(anno_message) + '\n') f_in.close() if not debug: os.remove(cnvs) f_out.close() return output