progress = ProgressText() base = os.path.split(os.path.abspath(opt.counts))[0] TRNA = {} NRNA = {} GDNA = {} SDNA = {} from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() labels = list(map(str, list(range(1, 100)))) + ["X", "Y", "MT"] chrreg.add_labels(opt.counts, labels) chrreg.default_chrom_order() chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts, l)) progress.stage("Parsing read-counts") f = open(opt.counts, mode='rt', encoding='utf8') reader = csv.DictReader(f, delimiter='\t') types2files = defaultdict(set) files2types = defaultdict(set) for row in reader: key = (row['CHROM'], row['POS']) filename = row['AlignedReads'] for k in row: if k.endswith('Count') and row[k] != "": row[k] = int(row[k]) if k.endswith('Sc') and row[k] != "": row[k] = float(row[k]) if re.search(regex["GDNA"], filename) and key not in GDNA:
chrreg.add_labels(snvfile,snvchroms[snvfile]) snvdata1 = {} for (sf, chr, locus, ref, alt), r in snvdata.iteritems(): chrom = chrreg.label2chrom(sf,chr) assert(chrom) snvkey = (chrom,locus,ref,alt) if snvkey not in snvdata1: snvdata1[snvkey] = (chrom,locus,ref,alt,r) for bamfile in opt.alignments: chrreg.add_bamlabels(bamfile) chrreg.determine_chrom_order() snvdata = sorted(snvdata1.values(),key=lambda s: (chrreg.chrom_order(s[0]),s[1],s[2],s[3])) # extrasnvheaders = filter(lambda h: h in usedsnvheaders, extrasnvheaders) progress.message("SNVs: %d\n" % len(snvdata)) outheaders = snvheaders + filter(None, """ SNVCountForward SNVCountReverse RefCountForward RefCountReverse SNVCount RefCount GoodReads %BadRead R HomoVarSc HetSc
snvdata1 = {} for (sf, chr, locus, ref, alt), r in snvdata.iteritems(): chrom = chrreg.label2chrom(sf, chr) assert (chrom) snvkey = (chrom, locus, ref, alt) if snvkey not in snvdata1: snvdata1[snvkey] = (chrom, locus, ref, alt, r) for bamfile in opt.alignments: chrreg.add_bamlabels(bamfile) chrreg.determine_chrom_order() snvdata = sorted(snvdata1.values(), key=lambda s: (chrreg.chrom_order(s[0]), s[1], s[2], s[3])) # extrasnvheaders = filter(lambda h: h in usedsnvheaders, extrasnvheaders) progress.message("SNVs: %d\n" % len(snvdata)) outheaders = snvheaders + filter( None, """ SNVCountForward SNVCountReverse RefCountForward RefCountReverse SNVCount RefCount GoodReads %BadRead R HomoVarSc
regex["NRNA"] = opt.normaltransre regex["SDNA"] = opt.tumordnare regex["TRNA"] = opt.tumortransre progress = ProgressText() base = os.path.split(os.path.abspath(opt.counts))[0] TRNA = {}; NRNA = {}; GDNA = {}; SDNA = {} from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() labels = map(str,range(1,100)) + ["X","Y","MT"] chrreg.add_labels(opt.counts,labels) chrreg.default_chrom_order() chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts,l)) progress.stage("Parsing read-counts") f = open(opt.counts, 'r') reader = csv.DictReader(f, delimiter='\t') types2files = defaultdict(set) files2types = defaultdict(set) for row in reader: key = (row['CHROM'],row['POS']) filename = row['AlignedReads'] for k in row: if k.endswith('Count') and row[k] != "": row[k] = int(row[k]) if k.endswith('Sc') and row[k] != "": row[k] = float(row[k]) if re.search(regex["GDNA"],filename) and key not in GDNA: