Пример #1
0
def test_valid_line_count():
    p = iterators._FileIterator(example_filename('ncbi_gff3.txt'))
    assert len(list(p)) == 17

    p = iterators._FileIterator(example_filename('hybrid1.gff3'))
    assert len(list(p)) == 6

    p = iterators._FileIterator(example_filename('FBgn0031208.gff'))
    assert len(list(p)) == 27
Пример #2
0
def test_valid_line_count():
    p = iterators._FileIterator(example_filename('ncbi_gff3.txt'))
    assert len(list(p)) == 17

    p = iterators._FileIterator(example_filename('hybrid1.gff3'))
    assert len(list(p)) == 6

    p = iterators._FileIterator(example_filename('FBgn0031208.gff'))
    assert len(list(p)) == 27
Пример #3
0
def parser_smoke_test():
    """
    Just confirm we can iterate completely through the test files....
    """
    # Don't show the warnings for tests
    import logging
    parser.logger.setLevel(logging.CRITICAL)
    for filename in TEST_FILENAMES:
        p = iterators._FileIterator(filename)
        for i in p:
            continue
Пример #4
0
def parser_smoke_test():
    """
    Just confirm we can iterate completely through the test files....
    """
    # Don't show the warnings for tests
    import logging
    parser.logger.setLevel(logging.CRITICAL)
    for filename in TEST_FILENAMES:
        p = iterators._FileIterator(filename)
        for i in p:
            continue
Пример #5
0
def test_parser_from_string():
    """
    make sure from string and from file return identical results
    """
    line = b"chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    tmp = tempfile.NamedTemporaryFile()
    tmp.write(line)
    tmp.seek(0)

    p1 = iterators._StringIterator(
        "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    )
    p2 = iterators._FileIterator(tmp.name)
    lines = list(zip(p1, p2))
    assert len(lines) == 1
    assert p1.current_item_number == p2.current_item_number == 0
    assert lines[0][0] == lines[0][1]
Пример #6
0
def test_parser_from_string():
    """
    make sure from string and from file return identical results
    """
    line = b"chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    tmp = tempfile.NamedTemporaryFile()
    tmp.write(line)
    tmp.seek(0)

    p1 = iterators._StringIterator(
        "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    )
    p2 = iterators._FileIterator(tmp.name)
    lines = list(zip(p1, p2))
    assert len(lines) == 1
    assert p1.current_item_number == p2.current_item_number == 0
    assert lines[0][0] == lines[0][1]
Пример #7
0
def test_inspect():
    file_results = inspect.inspect(gffutils.example_filename('FBgn0031208.gff'), verbose=False)
    db_results = inspect.inspect(
        gffutils.create_db(
            gffutils.example_filename('FBgn0031208.gff'),
            ':memory:'),
        verbose=False
    )
    expected =  {

        'featuretype': {
            'intron': 3,
            'five_prime_UTR': 1,
            'exon': 6,
            'mRNA': 4,
            'CDS': 5,
            'pcr_product': 1,
            'three_prime_UTR': 2,
            'protein': 2,
            'gene': 3,
        },

        'feature_count': 27,

        'chrom': {
            'chr2L': 27,
        },

        'attribute_keys': {
            u'': 3,
            'Dbxref': 6,
            'Name': 19,
            'Parent': 20,
            ' Parent': 1,
            'score_text': 2,
            'gbunit': 1,
            'derived_computed_cyto': 1,
            'Derives_from': 2,
            'derived_molecular_weight': 2,
            'score': 2,
            'ID': 25,
            'derived_isoelectric_point': 2,
            'Ontology_term': 1,
        }
    }
    assert file_results == db_results == expected


    # file and db work because db is created from

    kwargs = dict(
        look_for=['chrom', 'strand', 'attribute_keys', 'featuretype'],
        verbose=False,
        limit=10,
    )

    file_results = inspect.inspect(
        gffutils.example_filename('FBgn0031208.gff'),
        **kwargs
    )
    iter_results = inspect.inspect(
        iter(iterators._FileIterator(gffutils.example_filename('FBgn0031208.gff'))),
        **kwargs
    )
    db_results = inspect.inspect(
        gffutils.create_db(
            gffutils.example_filename('FBgn0031208.gff'),
            ':memory:'),
        **kwargs
    )

    expected = {
        'attribute_keys': {
            u'Name': 9,
            u'Parent': 9,
            u'score_text': 2,
            u'gbunit': 1,
            u'derived_computed_cyto': 1,
            u'score': 2,
            u'Dbxref': 3,
            u'ID': 8,
            u'Ontology_term': 1,
        },

        'feature_count': 10,

        'chrom': {u'chr2L': 10},

        'strand': {u'+': 10},

        'featuretype': {
            u'five_prime_UTR': 1,
            u'exon': 3,
            u'mRNA': 2,
            u'CDS': 1,
            'intron': 2,
            u'gene': 1}
    }
    assert file_results == db_results == iter_results == expected
Пример #8
0
def test_inspect():
    file_results = inspect.inspect(
        gffutils.example_filename('FBgn0031208.gff'), verbose=False)
    db_results = inspect.inspect(gffutils.create_db(
        gffutils.example_filename('FBgn0031208.gff'), ':memory:'),
                                 verbose=False)
    expected = {
        'featuretype': {
            'intron': 3,
            'five_prime_UTR': 1,
            'exon': 6,
            'mRNA': 4,
            'CDS': 5,
            'pcr_product': 1,
            'three_prime_UTR': 2,
            'protein': 2,
            'gene': 3,
        },
        'feature_count': 27,
        'chrom': {
            'chr2L': 27,
        },
        'attribute_keys': {
            u'': 3,
            'Dbxref': 6,
            'Name': 19,
            'Parent': 20,
            ' Parent': 1,
            'score_text': 2,
            'gbunit': 1,
            'derived_computed_cyto': 1,
            'Derives_from': 2,
            'derived_molecular_weight': 2,
            'score': 2,
            'ID': 25,
            'derived_isoelectric_point': 2,
            'Ontology_term': 1,
        }
    }
    assert file_results == db_results == expected

    # file and db work because db is created from

    kwargs = dict(
        look_for=['chrom', 'strand', 'attribute_keys', 'featuretype'],
        verbose=False,
        limit=10,
    )

    file_results = inspect.inspect(
        gffutils.example_filename('FBgn0031208.gff'), **kwargs)
    iter_results = inspect.inspect(
        iter(
            iterators._FileIterator(
                gffutils.example_filename('FBgn0031208.gff'))), **kwargs)
    db_results = inspect.inspect(
        gffutils.create_db(gffutils.example_filename('FBgn0031208.gff'),
                           ':memory:'), **kwargs)

    expected = {
        'attribute_keys': {
            u'Name': 9,
            u'Parent': 9,
            u'score_text': 2,
            u'gbunit': 1,
            u'derived_computed_cyto': 1,
            u'score': 2,
            u'Dbxref': 3,
            u'ID': 8,
            u'Ontology_term': 1,
        },
        'feature_count': 10,
        'chrom': {
            u'chr2L': 10
        },
        'strand': {
            u'+': 10
        },
        'featuretype': {
            u'five_prime_UTR': 1,
            u'exon': 3,
            u'mRNA': 2,
            u'CDS': 1,
            'intron': 2,
            u'gene': 1
        }
    }
    assert file_results == db_results == iter_results == expected