Python GFFParser.GFFParser примеры использования

Язык программирования: Python

Пространство имен/Пакет: BCBio.GFF

Класс/Тип: GFFParser

Метод/Функция: GFFParser

Примеров на hotexamples.com: 11

Python GFFParser.GFFParser - 11 примеров найдено. Это лучшие примеры Python кода для BCBio.GFF.GFFParser.GFFParser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

parse(18)

GFFParser(11)

parse_in_parts(3)

parse_simple(1)

Пример #1

Показать файл

def gff_to_bed(gff_file, bed_fh=sys.stdout, cds=True):

    parser = GFFParser()
    seqids = parser.parse(gff_file, None)

    for seqid in seqids:
        for feat in seqid.features:
            subf = feat.sub_features
            if feat.type in ("chromosome", "protein"): continue
            is_cds = any(f.type=="mRNA" or f.type=="CDS" for f in subf) and\
                    feat.type=="gene"
            if cds == is_cds:
                print >>bed_fh, "\t".join(str(x) for x in (seqid.id, feat.location.start, \
                        feat.location.end, feat.id, feat.type))

Пример #2

Показать файл

 def t_flat_features(self):
     """Check addition of flat non-nested features to multiple records.
     """
     seq_dict = self._get_seq_dict()
     pcr_limit_info = dict(
         gff_source_type = [('Orfeome', 'PCR_product'),
                      ('GenePair_STS', 'PCR_product'),
                      ('Promoterome', 'PCR_product')]
         )
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict,
         limit_info=pcr_limit_info))
     assert len(rec_dict['I'].features) == 4
     assert len(rec_dict['X'].features) == 5

Пример #3

Показать файл

Файл: test_GFFSeqIOFeatureAdder.py Проект: GunioRobot/bcbb

 def t_gff3_iterator_limit(self):
     """Iterated interface using a limit query on GFF3 files.
     """
     cds_limit_info = dict(gff_source_type=[('Coding_transcript', 'gene'),
                                            ('Coding_transcript', 'mRNA'),
                                            ('Coding_transcript', 'CDS')],
                           gff_id=['I'])
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(
         parser.parse(self._test_gff_file, limit_info=cds_limit_info))
     assert len(rec_dict) == 1
     tfeature = rec_dict["I"].features[0].sub_features[0]
     for sub_test in tfeature.sub_features:
         assert sub_test.type == "CDS", sub_test

Пример #4

Показать файл

 def t_basic_solid_parse(self):
     """Basic parsing of SOLiD GFF results files.
     """
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file))
     test_feature = rec_dict['3_341_424_F3'].features[0]
     assert test_feature.location.nofuzzy_start == 102716
     assert test_feature.location.nofuzzy_end == 102736
     assert len(test_feature.qualifiers) == 7
     assert test_feature.qualifiers['score'] == ['10.6']
     assert test_feature.qualifiers['source'] == ['solid']
     assert test_feature.strand == -1
     assert test_feature.type == 'read'
     assert test_feature.qualifiers['g'] == ['T2203031313223113212']
     assert len(test_feature.qualifiers['q']) == 20

Пример #5

Показать файл

Файл: gff_to_biosql.py Проект: rumanubhardwaj/BioSQL-Extensions

def main(seq_file, gff_file):
    # -- To be customized
    # You need to update these parameters to point to your local database
    # XXX demo example could be swapped to use SQLite when that is integrated
    db_name = "orphan.db"
    biodb_name = 'metagenomic_database'

    print "Parsing FASTA sequence file..."
    with open(seq_file) as seq_handle:
        seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))

    print "Parsing GFF data file..."
    parser = GFFParser()
    recs = parser.parse(gff_file, seq_dict )#, limit_info=limit_info)
    for r in recs:
        print r.features[0]

Пример #6

Показать файл

 def t_line_adjust(self):
     """Adjust lines during parsing to fix potential GFF problems.
     """
     def adjust_fn(results):
         rec_index = results['quals']['i'][0]
         read_name = results['rec_id']
         results['quals']['read_name'] = [read_name]
         results['rec_id'] = rec_index
         return results
     parser = GFFParser(line_adjust_fn=adjust_fn)
     recs = [r for r in parser.parse(self._test_gff_file)]
     assert len(recs) == 1
     work_rec = recs[0]
     assert work_rec.id == '1'
     assert len(work_rec.features) == 112
     assert work_rec.features[0].qualifiers['read_name'] == \
             ['3_336_815_F3']

Пример #7

Показать файл

def main(seq_file, gff_file):
    # -- To be customized
    # You need to update these parameters to point to your local database
    # XXX demo example could be swapped to use SQLite when that is integrated
    user = "******"
    passwd = "cdev"
    host = "localhost"
    db_name = "wb199_gff"
    biodb_name = "wb199_gff_cds_pcr"
    # These need to be updated to reflect what you would like to parse
    # out of the GFF file. Set limit_info=None to parse everything, but
    # be sure the file is small or you may deal with memory issues.
    rnai_types = [('Orfeome', 'PCR_product'), ('GenePair_STS', 'PCR_product'),
                  ('Promoterome', 'PCR_product')]
    gene_types = [('Non_coding_transcript', 'gene'),
                  ('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'),
                  ('Coding_transcript', 'CDS')]
    limit_info = dict(gff_source_type=rnai_types + gene_types)
    # --
    print "Parsing FASTA sequence file..."
    with open(seq_file) as seq_handle:
        seq_dict = SeqIO.to_dict(SeqIO.parse(seq_handle, "fasta"))

    print "Parsing GFF data file..."
    parser = GFFParser()
    recs = parser.parse(gff_file, seq_dict, limit_info=limit_info)

    print "Writing to BioSQL database..."
    server = BioSeqDatabase.open_database(driver="MySQLdb",
                                          user=user,
                                          passwd=passwd,
                                          host=host,
                                          db=db_name)
    try:
        if biodb_name not in server.keys():
            server.new_database(biodb_name)
        else:
            server.remove_database(biodb_name)
            server.adaptor.commit()
            server.new_database(biodb_name)
        db = server[biodb_name]
        db.load(recs)
        server.adaptor.commit()
    except:
        server.adaptor.rollback()
        raise

Пример #8

Показать файл

 def t_nested_features(self):
     """Check three-deep nesting of features with gene, mRNA and CDS.
     """
     seq_dict = self._get_seq_dict()
     cds_limit_info = dict(
             gff_source_type = [('Coding_transcript', 'gene'),
                          ('Coding_transcript', 'mRNA'),
                          ('Coding_transcript', 'CDS')],
             gff_id = ['I']
             )
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict,
         limit_info=cds_limit_info))
     final_rec = rec_dict['I']
     # first gene feature is plain
     assert len(final_rec.features) == 2 # two gene feature
     assert len(final_rec.features[0].sub_features) == 1 # one transcript
     # 15 final CDS regions
     assert len(final_rec.features[0].sub_features[0].sub_features) == 15

Пример #9

Показать файл

def main(gff_file,th_fasta):
    parser = GFFParser()
    #parser = GFFExaminer()
    seqids = parser.parse(gff_file, None)
    #seqids = parser.parent_child_map(gff_file)
    fasta = Fasta(th_fasta, flatten_inplace=True)
    out_fasta = open('this_is_a_test','w')
    for i,seqid in enumerate(seqids):
        ss= condens_transcript(seqid.features)
        for i,feat in enumerate(ss):
            #print feat
            ids = []
            has_cds = False
            ids.append(feat.id)
            for subf in feat.sub_features:
                if str(feat.type) == 'CDS' or feat.type == 'gene'  or feat.type == 'protein':
                    has_cds = True
            if has_cds: continue
            print >>out_fasta, '>%s' %ids[0]
            print >>out_fasta, fasta[seqid.id.lower()][int(feat.location.start):int(feat.location.end)]

Пример #10

Показать файл

 def t_nested_multiparent_features(self):
     """Verify correct nesting of features with multiple parents.
     """
     seq_dict = self._get_seq_dict()
     cds_limit_info = dict(
             gff_source_type = [('Coding_transcript', 'gene'),
                          ('Coding_transcript', 'mRNA'),
                          ('Coding_transcript', 'CDS')],
             gff_id = ['I']
             )
     parser = GFFParser()
     rec_dict = SeqIO.to_dict(parser.parse(self._test_gff_file, seq_dict,
         limit_info=cds_limit_info))
     final_rec = rec_dict['I']
     # second gene feature is multi-parent
     assert len(final_rec.features) == 2 # two gene feature
     cur_subs = final_rec.features[1].sub_features
     assert len(cur_subs) == 3 # three transcripts
     # the first and second transcript have the same CDSs
     assert len(cur_subs[0].sub_features) == 6
     assert len(cur_subs[1].sub_features) == 6
     assert cur_subs[0].sub_features[0] is cur_subs[1].sub_features[0]

Пример #11

Показать файл

Файл: find_rna.py Проект: yuzhenpeng/find_cns

def main(gff_file, outdir, th_fasta):
    """empty docstring"""
    parser = GFFParser()
    seqids = parser.parse(gff_file, None)

    fasta = Fasta(th_fasta, flatten_inplace=True)
    out_fasta = open(outdir + "/at_no_cds.fasta", "w")
    for seqid in seqids:
        seq_features = conden_transcripts(seqid.features)
        for feat in seq_features:
            has_cds = True
            ids = []
            ids.append(feat.id)
            for subf in feat.sub_features:
                if subf.type in set(
                    ['tRNA', 'rRNA', 'miRNA', 'snoRNA', 'ncRNA', 'snRNA']):
                    has_cds = False
            if has_cds: continue
            #non_cds_feats.append(feat)
            print >> out_fasta, ">%s" % ids[0]
            print >> out_fasta, fasta[seqid.id.lower(
            )][int(feat.location.start):int(feat.location.end)]