def count_variants(self, pos2var): if self.args.v: print "processing %s..." % self.rnaseq_fn dots = ProgressDots(self.args.dot_counter) with open(self.rnaseq_fn) as rf: reader = csv.reader(rf, delimiter="\t") for line in reader: if line[0].startswith("@"): continue chrom = line[2] pos = int(line[3]) seq = line[9] # print 'alignment: %s %d-%d (%d) %s' % (chrom, pos, pos+len(seq), len(seq), seq) self.stats["n_alignments"] += 1 if self.args.progress: dots.ping() # crawl the aligned read, looking for a variant: # This is O(m*n) on the number and length of the reads, # but it's O(1) for programmer laziness. variant = pos2var.variant_for(chrom, pos, len(seq)) if not variant: continue variant.n_alignments += 1 self.stats["n_variant_hits"] += 1 if variant.is_expressed_in_seq(seq, pos): variant.n_mut += 1 else: variant.n_wt += 1
def main(args): if args.v: print args print 'reading %s...' % args.variant_fn pos2var=VariantPositions(args.variant_fn) try: os.mkdir(args.output_dir) except OSError: pass for f in os.listdir(args.output_dir): os.unlink(os.path.join(args.output_dir, f)) dots=ProgressDots(args.dot_counter) stats={'n_reads':0, 'n_variants':0} print 'reading %s...' % args.rnaseq_fn with open(args.rnaseq_fn) as f: for line in f: dots.ping() if line.startswith('@'): continue stats['n_reads']+=1 row=line.split('\t') var=pos2var.variant_for(row[2], int(row[3]), len(row[9])) if not var: continue stats['n_variants']+=1 try: var.reads.append(line) except AttributeError: var.reads=[line] print # write out all reads for each variant: for var in pos2var.values(): try: fn=os.path.join(args.output_dir, '%s.fastq' % var.symbol) with open(fn, 'w') as var_f: for line in var.reads: var_f.write(line) except AttributeError: # on var.reads pass print stats return 0