def test_Feature(): a = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', 'ID=Gene1;Parent=p']) f = Feature.from_string(a) eq_('Gene1', f.ID) eq_(['p'], f.parents) b = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', 'ID=Gene1;Parent=p,q']) f = Feature.from_string(b) eq_(['p', 'q'], f.parents) c = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', '']) f = Feature.from_string(c) eq_('', f.ID) eq_([], f.parents)
def test_transcript_splice_junctions(): f = dummy('g.gff') features = Feature.from_file(f.name) juncs = transcript_splice_junctions(features) eq_({ 'Chr1_40.1': [220, 302], 'Chr1_364.1': [83], 'Chr1_366.1': [90, 196, 326, 535], }, juncs)
def test_Feature(): a = '\t'.join([ 'Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', 'ID=Gene1;Parent=p' ]) f = Feature.from_string(a) eq_('Gene1', f.ID) eq_(['p'], f.parents) b = '\t'.join([ 'Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', 'ID=Gene1;Parent=p,q' ]) f = Feature.from_string(b) eq_(['p', 'q'], f.parents) c = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', '']) f = Feature.from_string(c) eq_('', f.ID) eq_([], f.parents)
def test_transcript_splice_junctions(): f = dummy('g.gff') features = Feature.from_file(f.name) juncs = transcript_splice_junctions(features) eq_( { 'Chr1_40.1': [220, 302], 'Chr1_364.1': [83], 'Chr1_366.1': [90, 196, 326, 535], }, juncs)
return (end - start + 1) / b.length def overlap(db, feature): overlaps = db.overlaps(feature) if len(overlaps) >= args.min_overlap_count and \ len(overlaps) <= args.max_overlap_count: valid = [] for o in overlaps: amt = calc_overlap(o, feature) if amt >= args.min_overlap and amt <= args.max_overlap: valid.append(o.ID) if len(valid) > 0: feature.attributes['overlaps'] = ','.join(valid) if __name__ == '__main__': args = parser.parse_args() db = PositionDatabase(Feature.from_file(args.reference)) chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff)) for t in transcripts.values(): overlap(db, t) flat = flatten_tree(chromosomes) print '\n'.join([str(f) for f in flat])
def mapper_init(self): """TODO""" features = Feature.from_file(self.options.reference) self.junctions = transcript_splice_junctions(features)
hits = sum(counts[feature.ID].values()) except KeyError: hits = 0 # coverage is RPKM, reads per kilobase of reference per million mapped reads # http://www.clcbio.com/manual/genomics/Definition_RPKM.html try: return (math.pow(10, 9) * hits) / (self.total * feature.length) except ZeroDivisionError: return 0 if __name__ == '__main__': args = parser.parse_args() chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff)) # TODO would be nice to split filters out into predicate functions for transcript in transcripts.values(): exons = len([x for x in transcript.children if x.type == 'exon']) if args.counts: counts = Counts.from_file(args.counts) coverage = counts.coverage(transcript) else: coverage = 0 if transcript.length < args.min_length or transcript.length > args.max_length \ or exons < args.min_exons or exons > args.max_exons \ or coverage < args.min_coverage or coverage > args.max_coverage: