def test_valid_line_count(): p = iterators._FileIterator(example_filename('ncbi_gff3.txt')) assert len(list(p)) == 17 p = iterators._FileIterator(example_filename('hybrid1.gff3')) assert len(list(p)) == 6 p = iterators._FileIterator(example_filename('FBgn0031208.gff')) assert len(list(p)) == 27
def parser_smoke_test(): """ Just confirm we can iterate completely through the test files.... """ # Don't show the warnings for tests import logging parser.logger.setLevel(logging.CRITICAL) for filename in TEST_FILENAMES: p = iterators._FileIterator(filename) for i in p: continue
def test_parser_from_string(): """ make sure from string and from file return identical results """ line = b"chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690" tmp = tempfile.NamedTemporaryFile() tmp.write(line) tmp.seek(0) p1 = iterators._StringIterator( "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690" ) p2 = iterators._FileIterator(tmp.name) lines = list(zip(p1, p2)) assert len(lines) == 1 assert p1.current_item_number == p2.current_item_number == 0 assert lines[0][0] == lines[0][1]
def test_inspect(): file_results = inspect.inspect(gffutils.example_filename('FBgn0031208.gff'), verbose=False) db_results = inspect.inspect( gffutils.create_db( gffutils.example_filename('FBgn0031208.gff'), ':memory:'), verbose=False ) expected = { 'featuretype': { 'intron': 3, 'five_prime_UTR': 1, 'exon': 6, 'mRNA': 4, 'CDS': 5, 'pcr_product': 1, 'three_prime_UTR': 2, 'protein': 2, 'gene': 3, }, 'feature_count': 27, 'chrom': { 'chr2L': 27, }, 'attribute_keys': { u'': 3, 'Dbxref': 6, 'Name': 19, 'Parent': 20, ' Parent': 1, 'score_text': 2, 'gbunit': 1, 'derived_computed_cyto': 1, 'Derives_from': 2, 'derived_molecular_weight': 2, 'score': 2, 'ID': 25, 'derived_isoelectric_point': 2, 'Ontology_term': 1, } } assert file_results == db_results == expected # file and db work because db is created from kwargs = dict( look_for=['chrom', 'strand', 'attribute_keys', 'featuretype'], verbose=False, limit=10, ) file_results = inspect.inspect( gffutils.example_filename('FBgn0031208.gff'), **kwargs ) iter_results = inspect.inspect( iter(iterators._FileIterator(gffutils.example_filename('FBgn0031208.gff'))), **kwargs ) db_results = inspect.inspect( gffutils.create_db( gffutils.example_filename('FBgn0031208.gff'), ':memory:'), **kwargs ) expected = { 'attribute_keys': { u'Name': 9, u'Parent': 9, u'score_text': 2, u'gbunit': 1, u'derived_computed_cyto': 1, u'score': 2, u'Dbxref': 3, u'ID': 8, u'Ontology_term': 1, }, 'feature_count': 10, 'chrom': {u'chr2L': 10}, 'strand': {u'+': 10}, 'featuretype': { u'five_prime_UTR': 1, u'exon': 3, u'mRNA': 2, u'CDS': 1, 'intron': 2, u'gene': 1} } assert file_results == db_results == iter_results == expected
def test_inspect(): file_results = inspect.inspect( gffutils.example_filename('FBgn0031208.gff'), verbose=False) db_results = inspect.inspect(gffutils.create_db( gffutils.example_filename('FBgn0031208.gff'), ':memory:'), verbose=False) expected = { 'featuretype': { 'intron': 3, 'five_prime_UTR': 1, 'exon': 6, 'mRNA': 4, 'CDS': 5, 'pcr_product': 1, 'three_prime_UTR': 2, 'protein': 2, 'gene': 3, }, 'feature_count': 27, 'chrom': { 'chr2L': 27, }, 'attribute_keys': { u'': 3, 'Dbxref': 6, 'Name': 19, 'Parent': 20, ' Parent': 1, 'score_text': 2, 'gbunit': 1, 'derived_computed_cyto': 1, 'Derives_from': 2, 'derived_molecular_weight': 2, 'score': 2, 'ID': 25, 'derived_isoelectric_point': 2, 'Ontology_term': 1, } } assert file_results == db_results == expected # file and db work because db is created from kwargs = dict( look_for=['chrom', 'strand', 'attribute_keys', 'featuretype'], verbose=False, limit=10, ) file_results = inspect.inspect( gffutils.example_filename('FBgn0031208.gff'), **kwargs) iter_results = inspect.inspect( iter( iterators._FileIterator( gffutils.example_filename('FBgn0031208.gff'))), **kwargs) db_results = inspect.inspect( gffutils.create_db(gffutils.example_filename('FBgn0031208.gff'), ':memory:'), **kwargs) expected = { 'attribute_keys': { u'Name': 9, u'Parent': 9, u'score_text': 2, u'gbunit': 1, u'derived_computed_cyto': 1, u'score': 2, u'Dbxref': 3, u'ID': 8, u'Ontology_term': 1, }, 'feature_count': 10, 'chrom': { u'chr2L': 10 }, 'strand': { u'+': 10 }, 'featuretype': { u'five_prime_UTR': 1, u'exon': 3, u'mRNA': 2, u'CDS': 1, 'intron': 2, u'gene': 1 } } assert file_results == db_results == iter_results == expected