Exemplo n.º 1
0
    def count_variants(self, pos2var):
        if self.args.v:
            print "processing %s..." % self.rnaseq_fn
        dots = ProgressDots(self.args.dot_counter)

        with open(self.rnaseq_fn) as rf:
            reader = csv.reader(rf, delimiter="\t")
            for line in reader:
                if line[0].startswith("@"):
                    continue
                chrom = line[2]
                pos = int(line[3])
                seq = line[9]
                # print 'alignment: %s %d-%d (%d) %s' % (chrom, pos, pos+len(seq), len(seq), seq)

                self.stats["n_alignments"] += 1
                if self.args.progress:
                    dots.ping()

                # crawl the aligned read, looking for a variant:
                # This is O(m*n) on the number and length of the reads,
                # but it's O(1) for programmer laziness.
                variant = pos2var.variant_for(chrom, pos, len(seq))
                if not variant:
                    continue
                variant.n_alignments += 1
                self.stats["n_variant_hits"] += 1

                if variant.is_expressed_in_seq(seq, pos):
                    variant.n_mut += 1
                else:
                    variant.n_wt += 1
Exemplo n.º 2
0
def main(args):
    if args.v: print args
    print 'reading %s...' % args.variant_fn
    pos2var=VariantPositions(args.variant_fn)

    try: os.mkdir(args.output_dir)
    except OSError: pass

    for f in os.listdir(args.output_dir):
        os.unlink(os.path.join(args.output_dir, f))

    dots=ProgressDots(args.dot_counter)

    stats={'n_reads':0,
           'n_variants':0}
    print 'reading %s...' % args.rnaseq_fn
    with open(args.rnaseq_fn) as f:
        for line in f:
            dots.ping()
            if line.startswith('@'):
                continue
            stats['n_reads']+=1

            row=line.split('\t')
            var=pos2var.variant_for(row[2], int(row[3]), len(row[9]))
            if not var: continue
            stats['n_variants']+=1
            try:
                var.reads.append(line)
            except AttributeError:
                var.reads=[line]
    print

    # write out all reads for each variant:
    for var in pos2var.values():
        try:
            fn=os.path.join(args.output_dir, '%s.fastq' % var.symbol)
            with open(fn, 'w') as var_f:
                for line in var.reads:
                    var_f.write(line)
        except AttributeError:  # on var.reads
            pass

    print stats
    return 0