Пример #1
0
def test_create_downloadjob_symlink_only(req, tmpdir):
    entry, config, joblist = prepare_create_downloadjob(req, tmpdir, human_readable=True,
                                                        create_local_file=True)
    jobs = core.create_downloadjob(entry, 'bacteria', config)
    expected = [core.DownloadJob(None, j.local_file, None, j.symlink_path)
                for j in joblist if j.local_file.endswith('_genomic.gbff.gz')]
    assert jobs == expected
Пример #2
0
def test_download_entry_symlink_only(req, tmpdir):
    entry, outdir, joblist = prepare_download_entry(req, tmpdir, human_readable=True,
                                                    create_local_file=True)
    jobs = core.download_entry(entry, 'refseq', 'bacteria', str(outdir), 'genbank', True)
    expected = [(core.DownloadJob(None, j[0].local_file, None, j[0].symlink_path), None)
                for j in joblist if j[0].local_file.endswith('_genomic.gbff.gz')]
    assert jobs == expected
Пример #3
0
def prepare_download_entry(req, tmpdir, format_map=core.FORMAT_NAME_MAP):
    # Set up test env
    entry = {
        'assembly_accession': 'FAKE0.1',
        'ftp_path': 'http://fake/genomes/FAKE0.1'
    }

    outdir = tmpdir.mkdir('output')
    download_jobs = []

    checksum_file_content = ''
    for key, val in format_map.items():
        seqfile = tmpdir.join('fake{}'.format(val))
        seqfile.write(key)

        checksum = core.md5sum(str(seqfile))
        filename = path.basename(str(seqfile))
        full_url = 'http://fake/genomes/FAKE0.1/{}'.format(filename)
        local_file = str(outdir.join('refseq', 'bacteria', 'FAKE0.1',
                                     filename))

        download_jobs.append(core.DownloadJob(full_url, local_file, checksum))
        checksum_file_content += '{}\t./{}\n'.format(checksum, filename)
        req.get(full_url, text=seqfile.read())

    req.get('http://fake/genomes/FAKE0.1/md5checksums.txt',
            text=checksum_file_content)

    return entry, outdir, download_jobs
Пример #4
0
def test_download_defaults(monkeypatch, mocker):
    """Test _download is called an appropriate number of times."""
    worker_mock = mocker.MagicMock()
    _download_mock = mocker.MagicMock(
        return_value=[core.DownloadJob(None, None, None, None)])
    monkeypatch.setattr(core, '_download', _download_mock)
    monkeypatch.setattr(core, 'worker', worker_mock)
    assert core.download() == 0
    assert _download_mock.call_count == len(core.SUPPORTED_TAXONOMIC_GROUPS)
Пример #5
0
def test_download_metadata(monkeypatch, mocker, req, tmpdir):
    """Test creating the metadata file works."""
    metadata_file = tmpdir.join('metadata.tsv')
    summary_contents = open(_get_file('partial_summary.txt'), 'r').read()
    req.get('https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/assembly_summary.txt',
            text=summary_contents)
    mocker.spy(core, 'get_summary')
    mocker.spy(core, 'parse_summary')
    mocker.patch('ncbi_genome_download.core.create_downloadjob', return_value=[core.DownloadJob(None, None, None, None)])
    core.download(group='bacteria', output='/tmp/fake', metadata_table=str(metadata_file))
    assert core.get_summary.call_count == 1
    assert core.parse_summary.call_count == 1
    assert core.create_downloadjob.call_count == 4
    assert metadata_file.check()
Пример #6
0
def prepare_create_downloadjob(req,
                               tmpdir,
                               format_map=NgdConfig._FORMATS,
                               human_readable=False,
                               create_local_file=False):
    # Set up test env
    entry = {
        'assembly_accession': 'FAKE0.1',
        'organism_name': 'Example species',
        'infraspecific_name': 'strain=ABC 1234',
        'ftp_path': 'https://fake/genomes/FAKE0.1'
    }

    config = NgdConfig()

    outdir = tmpdir.mkdir('output')
    download_jobs = []
    config.output = str(outdir)
    config.human_readable = human_readable

    checksum_file_content = ''
    for key, val in format_map.items():
        seqfile = tmpdir.join('fake{}'.format(val))
        seqfile.write(key)

        checksum = core.md5sum(str(seqfile))
        filename = path.basename(str(seqfile))
        full_url = 'https://fake/genomes/FAKE0.1/{}'.format(filename)
        local_file = outdir.join('refseq', 'bacteria', 'FAKE0.1', filename)
        if create_local_file:
            local_file.write(seqfile.read(), ensure=True)

        symlink_path = None
        if human_readable:
            symlink_path = str(
                outdir.join('human_readable', 'refseq', 'bacteria', 'Example',
                            'species', 'ABC_1234', filename))

        download_jobs.append(
            core.DownloadJob(full_url, str(local_file), checksum,
                             symlink_path))
        checksum_file_content += '{}\t./{}\n'.format(checksum, filename)
        req.get(full_url, text=seqfile.read())

    req.get('https://fake/genomes/FAKE0.1/md5checksums.txt',
            text=checksum_file_content)

    return entry, config, download_jobs
Пример #7
0
def test_args_download_defaults(monkeypatch, mocker):
    """Test args_download does the correct thing."""
    entry = {
        'assembly_accession': 'FAKE0.1',
        'organism_name': 'Example species',
        'infraspecific_name': 'strain=ABC 1234',
        'ftp_path': 'https://fake/genomes/FAKE0.1'
    }
    worker_mock = mocker.MagicMock()
    select_candidates_mock = mocker.MagicMock(return_value=[(entry, 'bacteria')])
    create_downloadjob_mock = mocker.MagicMock(return_value=[core.DownloadJob(None, None, None, None)])
    monkeypatch.setattr(core, 'select_candidates', select_candidates_mock)
    monkeypatch.setattr(core, 'create_downloadjob', create_downloadjob_mock)
    monkeypatch.setattr(core, 'worker', worker_mock)
    assert core.args_download(Namespace()) == 0
    assert select_candidates_mock.call_args_list[0][0][0].group == SUPPORTED_TAXONOMIC_GROUPS
    assert create_downloadjob_mock.call_args_list[0][0][0] == entry
Пример #8
0
def test_download_dry_run(monkeypatch, mocker):
    """Test _download is not called for a dry run."""
    entry = {
        'assembly_accession': 'FAKE0.1',
        'organism_name': 'Example species',
        'infraspecific_name': 'strain=ABC 1234',
        'ftp_path': 'https://fake/genomes/FAKE0.1'
    }
    worker_mock = mocker.MagicMock()
    select_candidates_mock = mocker.MagicMock(return_value=[(entry, 'bacteria')])
    create_downloadjob_mock = mocker.MagicMock(return_value=[core.DownloadJob(None, None, None, None)])
    monkeypatch.setattr(core, 'select_candidates', select_candidates_mock)
    monkeypatch.setattr(core, 'create_downloadjob', create_downloadjob_mock)
    monkeypatch.setattr(core, 'worker', worker_mock)
    assert core.download(dry_run=True) == 0
    assert select_candidates_mock.call_count == 1
    assert create_downloadjob_mock.call_count == 0
    assert worker_mock.call_count == 0