def t_gff2_iteration(self): """Test iterated features with GFF2 files, breaking without parents. """ gff_iterator = GFFAddingIterator() break_dicts = [] for rec_dict in gff_iterator.get_features(self._wormbase_file, target_lines=15): break_dicts.append(rec_dict) assert len(break_dicts) == 3
def t_solid_iterator(self): """Iterated parsing in a flat file without nested features. """ gff_iterator = GFFAddingIterator() feature_sizes = [] for rec_dict in gff_iterator.get_features(self._test_gff_file, target_lines=5): feature_sizes.append([len(r.features) for r in rec_dict.values()]) assert max([sum(s) for s in feature_sizes]) == 5 assert len(feature_sizes) == 26, len(feature_sizes)
def t_gff3_iterator(self): """Iterated parsing in GFF3 files with nested features. """ gff_iterator = GFFAddingIterator() feature_sizes = [] for rec_dict in gff_iterator.get_features(self._test_gff_file, target_lines=70): feature_sizes.append([len(r.features) for r in rec_dict.values()]) # should be one big set because we don't have a good place to split assert len(feature_sizes) == 1 assert feature_sizes[0][0] == 59
def t_gff3_iterator_limit(self): """Iterated interface using a limit query on GFF3 files. """ gff_iterator = GFFAddingIterator() cds_limit_info = dict( gff_source_type = [('Coding_transcript', 'gene'), ('Coding_transcript', 'mRNA'), ('Coding_transcript', 'CDS')], gff_id = ['I'] ) it_recs = [] for rec_dict in gff_iterator.get_features(self._test_gff_file, limit_info=cds_limit_info): it_recs.append(rec_dict) assert len(it_recs) == 1 tfeature = it_recs[0]["I"].features[0].sub_features[0] for sub_test in tfeature.sub_features: assert sub_test.type == "CDS", sub_test