Python GTFFeature.from_string примеры использования

Язык программирования: Python

Пространство имен/Пакет: gtf

Класс/Тип: GTFFeature

Метод/Функция: from_string

Примеров на hotexamples.com: 3

Python GTFFeature.from_string - 3 примера найдено. Это лучшие примеры Python кода для gtf.GTFFeature.from_string, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

from_string(2)

GTFFeature(1)

attrs(1)

end(1)

feature_type(1)

parse(1)

phase(1)

score(1)

seqid(1)

source(1)

start(1)

strand(1)

Пример #1

Показать файл

Файл: transcript.py Проект: BioXiao/assemblyline

def transcripts_from_gtf_lines(lines, attr_defs=None):
    transcripts = collections.OrderedDict()
    for line in lines:
        feature = GTFFeature.from_string(line, attr_defs)
        t_id = feature.attrs["transcript_id"]
        if t_id not in transcripts:            
            if feature.feature_type != "transcript":
                raise GTFError("Feature type '%s' found before 'transcript' record: %s" % 
                               (feature.feature_type, str(feature)))
            t = Transcript()
            t.chrom = feature.seqid
            t.start = feature.start
            t.end = feature.end
            # convert from string strand notation ("+", "-", ".") 
            # to integer (0, 1)
            t.strand = strand_str_to_int(feature.strand)
            t.exons = []
            t.attrs = feature.attrs
            transcripts[t_id] = t
        else:
            t = transcripts[t_id]
        if feature.feature_type == "exon":
            t.exons.append(Exon(feature.start, feature.end))
    # sort transcript exons by genomic position
    for t in transcripts.itervalues():
        t.exons.sort()
    return transcripts.values()

Пример #2

Показать файл

Файл: transcript.py Проект: bioShaun/omsTools

def to_formatted_gtf(lines, gtf_file, attr_defs=None):
    transcripts = collections.OrderedDict()
    for line in lines:
        feature = GTFFeature.from_string(line, attr_defs)
        # skip gene annotation in gtf files
        if "transcript_id" not in feature.attrs:
            continue
        t_id = feature.attrs["transcript_id"]
        # extract exon information
        if t_id not in transcripts:
            if feature.feature_type == "exon":
                t = Transcript()
                t.chrom = feature.seqid
                t.start = feature.start
                t.end = feature.end
                t.strand = strand_str_to_int(feature.strand)
                t.exons = [Exon(feature.start, feature.end)]
                t.attrs = dict()
                for each_attr in feature.attrs:
                    if 'exon' not in each_attr.lower():
                        t.attrs.update({each_attr: feature.attrs[each_attr]})
                transcripts[t_id] = t
        else:
            t.start = t.start if t.start <= feature.start else feature.start
            t.end = t.end if t.end >= feature.end else feature.end
            t.exons.append(Exon(feature.start, feature.end))
    with open(gtf_file, 'w') as gtf_output:
        for each_tr in transcripts:
            each_tr_obj = transcripts[each_tr]
            for each_feature in each_tr_obj.to_gtf_features():
                gtf_output.write(
                    '{gtf_line}\n'.format(gtf_line=str(each_feature)))

Пример #3

Показать файл

def transcripts_from_gtf_lines(lines, attr_defs=None):
    transcripts = collections.OrderedDict()
    for line in lines:
        feature = GTFFeature.from_string(line, attr_defs)
        t_id = feature.attrs["transcript_id"]
        if t_id not in transcripts:
            if feature.feature_type != "transcript":
                raise GTFError(
                    "Feature type '%s' found before 'transcript' record: %s" %
                    (feature.feature_type, str(feature)))
            t = Transcript()
            t.chrom = feature.seqid
            t.start = feature.start
            t.end = feature.end
            # convert from string strand notation ("+", "-", ".")
            # to integer (0, 1)
            t.strand = strand_str_to_int(feature.strand)
            t.exons = []
            t.attrs = feature.attrs
            transcripts[t_id] = t
        else:
            t = transcripts[t_id]
        if feature.feature_type == "exon":
            t.exons.append(Exon(feature.start, feature.end))
    # sort transcript exons by genomic position
    for t in transcripts.itervalues():
        t.exons.sort()
    return transcripts.values()