Python Feature示例，odetta.gff.feature.Feature Python示例

示例#1

0

显示文件

文件： test_feature.py 项目： abuchanan/odetta

def test_Feature():
    a = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', 
                   '.', 'ID=Gene1;Parent=p'])
    f = Feature.from_string(a)
    eq_('Gene1', f.ID)
    eq_(['p'], f.parents)

    b = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', 
                   '.', 'ID=Gene1;Parent=p,q'])
    f = Feature.from_string(b)
    eq_(['p', 'q'], f.parents)

    c = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', 
                   '.', ''])
    f = Feature.from_string(c)
    eq_('', f.ID)
    eq_([], f.parents)

示例#2

0

显示文件

文件： test_gff.py 项目： abuchanan/odetta

def test_transcript_splice_junctions():
    f = dummy('g.gff')
    features = Feature.from_file(f.name)
    juncs = transcript_splice_junctions(features)
    eq_({
        'Chr1_40.1': [220, 302],
        'Chr1_364.1': [83],
        'Chr1_366.1': [90, 196, 326, 535],
    }, juncs)

示例#3

0

显示文件

文件： test_feature.py 项目： buchanae/odetta

def test_Feature():
    a = '\t'.join([
        'Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', 'ID=Gene1;Parent=p'
    ])
    f = Feature.from_string(a)
    eq_('Gene1', f.ID)
    eq_(['p'], f.parents)

    b = '\t'.join([
        'Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.',
        'ID=Gene1;Parent=p,q'
    ])
    f = Feature.from_string(b)
    eq_(['p', 'q'], f.parents)

    c = '\t'.join(['Chr1', 'TAIR10', 'gene', '2', '20', '.', '+', '.', ''])
    f = Feature.from_string(c)
    eq_('', f.ID)
    eq_([], f.parents)

示例#4

0

显示文件

def test_transcript_splice_junctions():
    f = dummy('g.gff')
    features = Feature.from_file(f.name)
    juncs = transcript_splice_junctions(features)
    eq_(
        {
            'Chr1_40.1': [220, 302],
            'Chr1_364.1': [83],
            'Chr1_366.1': [90, 196, 326, 535],
        }, juncs)

示例#5

0

显示文件

文件： overlap.py 项目： abuchanan/odetta

    return (end - start + 1) / b.length


def overlap(db, feature):
    overlaps = db.overlaps(feature)

    if len(overlaps) >= args.min_overlap_count and \
       len(overlaps) <= args.max_overlap_count: 

        valid = []
        for o in overlaps:
            amt = calc_overlap(o, feature)
            if amt >= args.min_overlap and amt <= args.max_overlap:
                valid.append(o.ID)

        if len(valid) > 0:
            feature.attributes['overlaps'] = ','.join(valid)


if __name__ == '__main__':
    args = parser.parse_args()

    db = PositionDatabase(Feature.from_file(args.reference))
    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    for t in transcripts.values():
        overlap(db, t)

    flat = flatten_tree(chromosomes)
    print '\n'.join([str(f) for f in flat])

示例#6

0

显示文件

文件： reference_counts_base.py 项目： abuchanan/odetta

 def mapper_init(self):
     """TODO"""
     features = Feature.from_file(self.options.reference)
     self.junctions = transcript_splice_junctions(features)

示例#7

0

显示文件

    return (end - start + 1) / b.length


def overlap(db, feature):
    overlaps = db.overlaps(feature)

    if len(overlaps) >= args.min_overlap_count and \
       len(overlaps) <= args.max_overlap_count:

        valid = []
        for o in overlaps:
            amt = calc_overlap(o, feature)
            if amt >= args.min_overlap and amt <= args.max_overlap:
                valid.append(o.ID)

        if len(valid) > 0:
            feature.attributes['overlaps'] = ','.join(valid)


if __name__ == '__main__':
    args = parser.parse_args()

    db = PositionDatabase(Feature.from_file(args.reference))
    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    for t in transcripts.values():
        overlap(db, t)

    flat = flatten_tree(chromosomes)
    print '\n'.join([str(f) for f in flat])

示例#8

0

显示文件

 def mapper_init(self):
     """TODO"""
     features = Feature.from_file(self.options.reference)
     self.junctions = transcript_splice_junctions(features)

示例#9

0

显示文件

            hits = sum(counts[feature.ID].values())
        except KeyError:
            hits = 0

        # coverage is RPKM, reads per kilobase of reference per million mapped reads
        # http://www.clcbio.com/manual/genomics/Definition_RPKM.html
        try:
            return (math.pow(10, 9) * hits) / (self.total * feature.length)
        except ZeroDivisionError:
            return 0


if __name__ == '__main__':
    args = parser.parse_args()

    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    # TODO would be nice to split filters out into predicate functions
    for transcript in transcripts.values():

        exons = len([x for x in transcript.children if x.type == 'exon'])

        if args.counts:
            counts = Counts.from_file(args.counts)
            coverage = counts.coverage(transcript)
        else:
            coverage = 0

        if transcript.length < args.min_length or transcript.length > args.max_length \
        or exons < args.min_exons or exons > args.max_exons \
        or coverage < args.min_coverage or coverage > args.max_coverage:

示例#10

0

显示文件

文件： filter.py 项目： abuchanan/odetta

            hits = sum(counts[feature.ID].values())
        except KeyError:
            hits = 0
        
        # coverage is RPKM, reads per kilobase of reference per million mapped reads
        # http://www.clcbio.com/manual/genomics/Definition_RPKM.html
        try:
            return (math.pow(10, 9) * hits) / (self.total * feature.length)
        except ZeroDivisionError:
            return 0
                    
    
if __name__ == '__main__':
    args = parser.parse_args()

    chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff))

    # TODO would be nice to split filters out into predicate functions
    for transcript in transcripts.values():

        exons = len([x for x in transcript.children if x.type == 'exon'])

        if args.counts:
            counts = Counts.from_file(args.counts)
            coverage = counts.coverage(transcript)
        else:
            coverage = 0

        if transcript.length < args.min_length or transcript.length > args.max_length \
        or exons < args.min_exons or exons > args.max_exons \
        or coverage < args.min_coverage or coverage > args.max_coverage: