Exemplo n.º 1
0
def test_ucsc_gencode_gtf():
    with TemporaryDirectory() as tmpdir:
        gtf = GTF(UCSC_GENCODE_PATH, cache_directory_path=tmpdir)
        df = gtf.dataframe(save_to_disk=False)
        exons = df[df["feature"] == "exon"]
        # expect 12 exons from the dataframe
        assert len(exons) == 12, "Expected 12 exons, got %d: %s" % (len(exons), exons)
        df2 = gtf.dataframe(save_to_disk=True)
        assert len(df) == len(df2), "Got different length DataFrame"
        assert list(df.columns) == list(df2.columns)
Exemplo n.º 2
0
def test_ucsc_gencode_gtf():
    with TemporaryDirectory() as tmpdir:
        gtf = GTF(UCSC_GENCODE_PATH, cache_directory_path=tmpdir)
        df = gtf.dataframe(save_to_disk=False)
        exons = df[df["feature"] == "exon"]
        # expect 12 exons from the dataframe
        assert len(exons) == 12, \
            "Expected 12 exons, got %d: %s" % (len(exons), exons)
        df2 = gtf.dataframe(save_to_disk=True)
        assert len(df) == len(df2), "Got different length DataFrame"
        assert list(df.columns) == list(df2.columns)
Exemplo n.º 3
0
def test_ucsc_refseq_gtf():
    """
    Test GTF object with a small RefSeq GTF file downloaded from
    http://genome.ucsc.edu/cgi-bin/hgTables
    """
    with TemporaryDirectory() as tmpdir:
        gtf = GTF(UCSC_REFSEQ_PATH, cache_directory_path=tmpdir)
        df = gtf.dataframe(save_to_disk=False)
        exons = df[df["feature"] == "exon"]
        # expect 16 exons from the GTF
        assert len(exons) == 16, "Expected 16 exons, got %d: %s" % (len(exons), exons)
        df2 = gtf.dataframe(save_to_disk=True)
        assert len(df) == len(df2), "Got different length DataFrame"
        assert list(df.columns) == list(df2.columns)
Exemplo n.º 4
0
def test_ucsc_refseq_gtf():
    """
    Test GTF object with a small RefSeq GTF file downloaded from
    http://genome.ucsc.edu/cgi-bin/hgTables
    """
    with TemporaryDirectory() as tmpdir:
        gtf = GTF(UCSC_REFSEQ_PATH, cache_directory_path=tmpdir)
        df = gtf.dataframe(save_to_disk=False)
        exons = df[df["feature"] == "exon"]
        # expect 16 exons from the GTF
        assert len(exons) == 16, \
            "Expected 16 exons, got %d: %s" % (
                len(exons), exons)
        df2 = gtf.dataframe(save_to_disk=True)
        assert len(df) == len(df2), "Got different length DataFrame"
        assert list(df.columns) == list(df2.columns)
Exemplo n.º 5
0
def test_gtf_object_path():
    """
    Make sure that GTF doesn't do anything funny under the hood
    (such as copying local files to some cached directory), all of that
    should be done at the level of Genome/DownloadCache.
    """
    gtf_object = GTF(gtf_path)
    assert gtf_path == gtf_object.gtf_path
Exemplo n.º 6
0
def test_gtf_creates_csv_files_in_cache_dir():
    """
    Make sure that GTF obeys the cache_dir argument by creating all of its
    index files there.
    """
    with TemporaryDirectory() as tmpdir:
        gtf_object = GTF(gtf_path, cache_directory_path=tmpdir)
        csv_path = gtf_object.data_subset_path()
        assert tmpdir in csv_path, \
            "Expected CSV path %s to contain cache_dir" % csv_path
        search_pattern = join(tmpdir, "*mouse*")
        assert len(glob(search_pattern)) == 0, \
            "Temporary directory should be empty"
        # creating the dataframe should have the effect of triggering
        # GTF parsing and then saving the parsed results in a csv file
        gtf_object.dataframe()
        assert len(glob(search_pattern)) > 0, \
            "Expected GTF to save files in cache_dir"
Exemplo n.º 7
0
def test_gtf_creates_csv_files_in_cache_dir():
    """
    Make sure that GTF obeys the cache_dir argument by creating all of its
    index files there.
    """
    with TemporaryDirectory() as tmpdir:
        gtf_object = GTF(gtf_path, cache_directory_path=tmpdir)
        csv_path = gtf_object.data_subset_path()
        assert tmpdir in csv_path, \
            "Expected CSV path %s to contain cache_dir" % csv_path
        search_pattern = join(tmpdir, "*mouse*")
        assert len(glob(search_pattern)) == 0, \
            "Temporary directory should be empty"
        # creating the dataframe should have the effect of triggering
        # GTF parsing and then saving the parsed results in a csv file
        gtf_object.dataframe()
        assert len(glob(search_pattern)) > 0, \
            "Expected GTF to save files in cache_dir"