示例#1
0
 def from_gtf(f):
     '''GTF.Feature object to Transfrag'''
     return Transfrag(chrom=f.seqid,
                      strand=Strand.from_gtf(f.strand),
                      _id=f.attrs[GTF.Attr.TRANSCRIPT_ID],
                      sample_id=f.attrs.get(GTF.Attr.SAMPLE_ID, None),
                      expr=float(f.attrs.get(GTF.Attr.EXPR, 0.0)),
                      is_ref=bool(int(f.attrs.get(GTF.Attr.REF, '0'))),
                      exons=None)
示例#2
0
 def from_gtf(f):
     '''GTF.Feature object to Transfrag'''
     return Transfrag(chrom=f.seqid,
                      strand=Strand.from_gtf(f.strand),
                      _id=f.attrs[GTF.Attr.TRANSCRIPT_ID],
                      sample_id=f.attrs.get(GTF.Attr.SAMPLE_ID, None),
                      expr=float(f.attrs.get(GTF.Attr.EXPR, 0.0)),
                      is_ref=bool(int(f.attrs.get(GTF.Attr.REF, '0'))),
                      exons=None)
示例#3
0
文件: aggregate.py 项目: tacorna/taco
def parse_gtf(gtf_iter, sample_id, gtf_expr_attr, is_ref):
    '''
    returns list of Transfrag objects
    '''
    t_dict = collections.OrderedDict()
    total_expr = 0.0
    cur_t_id = 1
    for gtf_line in gtf_iter:
        if not gtf_line:
            continue
        if not gtf_line.strip():
            continue
        if gtf_line.startswith("#"):
            continue
        f = GTF.Feature.from_str(gtf_line)
        if f.feature == 'transcript':
            t_id = f.attrs[GTF.Attr.TRANSCRIPT_ID]
            if t_id in t_dict:
                raise GTFError("Transcript '%s' duplicate detected" % t_id)
            # rename transcript id
            new_t_id = "%s.%d" % (sample_id, cur_t_id)
            cur_t_id += 1
            # parse expression
            if is_ref:
                expr = 0.0
            else:
                if gtf_expr_attr not in f.attrs:
                    raise GTFError("GTF expression attribute '%s' not found" %
                                   (gtf_expr_attr))
                expr = float(f.attrs[gtf_expr_attr])
                total_expr += expr
            # create transfrag
            t = Transfrag(chrom=f.seqid,
                          strand=Strand.from_gtf(f.strand),
                          _id=new_t_id,
                          expr=float(expr),
                          is_ref=is_ref,
                          exons=None)
            t_dict[t_id] = t
        elif f.feature == 'exon':
            t_id = f.attrs[GTF.Attr.TRANSCRIPT_ID]
            if t_id not in t_dict:
                logging.error('Feature: "%s"' % str(f))
                raise GTFError("Transcript '%s' exon feature appeared in "
                               "gtf file prior to transcript feature" %
                               t_id)
            t = t_dict[t_id]
            t.exons.append(Exon(f.start, f.end))
    return t_dict.values(), total_expr
示例#4
0
文件: aggregate.py 项目: tacorna/taco
def parse_gtf(gtf_iter, sample_id, gtf_expr_attr, is_ref):
    '''
    returns list of Transfrag objects
    '''
    t_dict = collections.OrderedDict()
    total_expr = 0.0
    cur_t_id = 1
    for gtf_line in gtf_iter:
        if not gtf_line:
            continue
        if not gtf_line.strip():
            continue
        if gtf_line.startswith("#"):
            continue
        f = GTF.Feature.from_str(gtf_line)
        if f.feature == 'transcript':
            t_id = f.attrs[GTF.Attr.TRANSCRIPT_ID]
            if t_id in t_dict:
                raise GTFError("Transcript '%s' duplicate detected" % t_id)
            # rename transcript id
            new_t_id = "%s.%d" % (sample_id, cur_t_id)
            cur_t_id += 1
            # parse expression
            if is_ref:
                expr = 0.0
            else:
                if gtf_expr_attr not in f.attrs:
                    raise GTFError("GTF expression attribute '%s' not found" %
                                   (gtf_expr_attr))
                expr = float(f.attrs[gtf_expr_attr])
                total_expr += expr
            # create transfrag
            t = Transfrag(chrom=f.seqid,
                          strand=Strand.from_gtf(f.strand),
                          _id=new_t_id,
                          expr=float(expr),
                          is_ref=is_ref,
                          exons=None)
            t_dict[t_id] = t
        elif f.feature == 'exon':
            t_id = f.attrs[GTF.Attr.TRANSCRIPT_ID]
            if t_id not in t_dict:
                logging.error('Feature: "%s"' % str(f))
                raise GTFError("Transcript '%s' exon feature appeared in "
                               "gtf file prior to transcript feature" % t_id)
            t = t_dict[t_id]
            t.exons.append(Exon(f.start, f.end))
    return t_dict.values(), total_expr