def prepare_download_entry(req, tmpdir, format_map=core.format_name_map): # Set up test env entry = { 'assembly_accession': 'FAKE0.1', 'ftp_path': 'http://fake/genomes/FAKE0.1' } outdir = tmpdir.mkdir('output') download_jobs = [] checksum_file_content = '' for key, val in format_map.items(): seqfile = tmpdir.join('fake{}'.format(val)) seqfile.write(key) checksum = core.md5sum(str(seqfile)) filename = path.basename(str(seqfile)) full_url = 'http://fake/genomes/FAKE0.1/{}'.format(filename) local_file = str(outdir.join('refseq', 'bacteria', 'FAKE0.1', filename)) download_jobs.append(core.DownloadJob(full_url, local_file, checksum)) checksum_file_content += '{}\t./{}\n'.format(checksum, filename) req.get(full_url, text=seqfile.read()) req.get('http://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content) return entry, outdir, download_jobs
def prepare_download_entry(req, tmpdir, format_map=core.FORMAT_NAME_MAP): # Set up test env entry = { 'assembly_accession': 'FAKE0.1', 'ftp_path': 'http://fake/genomes/FAKE0.1' } outdir = tmpdir.mkdir('output') download_jobs = [] checksum_file_content = '' for key, val in format_map.items(): seqfile = tmpdir.join('fake{}'.format(val)) seqfile.write(key) checksum = core.md5sum(str(seqfile)) filename = path.basename(str(seqfile)) full_url = 'http://fake/genomes/FAKE0.1/{}'.format(filename) local_file = str(outdir.join('refseq', 'bacteria', 'FAKE0.1', filename)) download_jobs.append(core.DownloadJob(full_url, local_file, checksum)) checksum_file_content += '{}\t./{}\n'.format(checksum, filename) req.get(full_url, text=seqfile.read()) req.get('http://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content) return entry, outdir, download_jobs
def test_download_file_fasta(req, tmpdir): entry = {'ftp_path': 'ftp://fake/path'} bogus_file = tmpdir.join('fake_cds_from_genomic.fna.gz') bogus_file.write("we don't want this one") bogus_checksum = core.md5sum(str(bogus_file)) fake_file = tmpdir.join('fake_genomic.fna.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) checksums = [ {'checksum': bogus_checksum, 'file': bogus_file.basename}, {'checksum': checksum, 'file': fake_file.basename}, ] dl_dir = tmpdir.mkdir('download') req.get('https://fake/path/fake_genomic.fna.gz', text=fake_file.read()) assert core.worker(core.download_file_job(entry, str(dl_dir), checksums, 'fasta'))
def test_download_file_genbank(req, tmpdir): entry = {'ftp_path': 'ftp://fake/path'} fake_file = tmpdir.join('fake_genomic.gbff.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) checksums = [{'checksum': checksum, 'file': fake_file.basename}] dl_dir = tmpdir.mkdir('download') req.get('https://fake/path/fake_genomic.gbff.gz', text=fake_file.read()) assert core.worker(core.download_file_job(entry, str(dl_dir), checksums))
def test_download_file_fasta(req, tmpdir): entry = {'ftp_path': 'ftp://fake/path'} fake_file = tmpdir.join('fake_genomic.fna.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) checksums = [{'checksum': checksum, 'file': fake_file.basename}] dl_dir = tmpdir.mkdir('download') req.get('http://fake/path/fake_genomic.fna.gz', text=fake_file.read()) assert core.worker(core.download_file(entry, str(dl_dir), checksums, 'fasta'))
def test_has_file_changed_unchanged(tmpdir): fake_file = tmpdir.join('fake_genomic.gbff.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) checksums = [ {'checksum': 'fake', 'file': 'skipped'}, {'checksum': checksum, 'file': fake_file.basename}, ] assert core.has_file_changed(str(tmpdir), checksums) is False
def test_create_symlink_job(tmpdir): dl_dir = tmpdir.mkdir('download') fake_file = dl_dir.join('fake_genomic.gbff.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) checksums = [{'checksum': checksum, 'file': fake_file.basename}] symlink_dir = tmpdir.mkdir('symlink') assert core.worker( core.create_symlink_job(str(dl_dir), checksums, 'genbank', str(symlink_dir))) symlink = symlink_dir.join('fake_genomic.gbff.gz') assert symlink.check()
def test_need_to_create_symlink(tmpdir): fake_file = tmpdir.join('fake_genomic.gbff.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) human_readable_dir = tmpdir.mkdir('human_readable') checksums = [ {'checksum': 'fake', 'file': 'skipped'}, {'checksum': checksum, 'file': fake_file.basename}, ] assert core.need_to_create_symlink(str(tmpdir), checksums, 'genbank', str(human_readable_dir))
def test_download_file_symlink_path(req, tmpdir): entry = {'ftp_path': 'ftp://fake/path'} fake_file = tmpdir.join('fake_genomic.gbff.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) checksums = [{'checksum': checksum, 'file': fake_file.basename}] dl_dir = tmpdir.mkdir('download') symlink_dir = tmpdir.mkdir('symlink') req.get('https://fake/path/fake_genomic.gbff.gz', text=fake_file.read()) assert core.worker( core.download_file_job(entry, str(dl_dir), checksums, symlink_path=str(symlink_dir))) symlink = symlink_dir.join('fake_genomic.gbff.gz') assert symlink.check()
def prepare_create_downloadjob(req, tmpdir, format_map=NgdConfig._FORMATS, human_readable=False, create_local_file=False): # Set up test env entry = { 'assembly_accession': 'FAKE0.1', 'organism_name': 'Example species', 'infraspecific_name': 'strain=ABC 1234', 'ftp_path': 'https://fake/genomes/FAKE0.1' } config = NgdConfig() outdir = tmpdir.mkdir('output') download_jobs = [] config.output = str(outdir) config.human_readable = human_readable checksum_file_content = '' for key, val in format_map.items(): seqfile = tmpdir.join('fake{}'.format(val)) seqfile.write(key) checksum = core.md5sum(str(seqfile)) filename = path.basename(str(seqfile)) full_url = 'https://fake/genomes/FAKE0.1/{}'.format(filename) local_file = outdir.join('refseq', 'bacteria', 'FAKE0.1', filename) if create_local_file: local_file.write(seqfile.read(), ensure=True) symlink_path = None if human_readable: symlink_path = str( outdir.join('human_readable', 'refseq', 'bacteria', 'Example', 'species', 'ABC_1234', filename)) download_jobs.append( core.DownloadJob(full_url, str(local_file), checksum, symlink_path)) checksum_file_content += '{}\t./{}\n'.format(checksum, filename) req.get(full_url, text=seqfile.read()) req.get('https://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content) return entry, config, download_jobs
def test_download_file_symlink_path_existed(req, tmpdir): entry = {'ftp_path': 'ftp://fake/path'} fake_file = tmpdir.join('fake_genomic.gbff.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) checksums = [{'checksum': checksum, 'file': fake_file.basename}] dl_dir = tmpdir.mkdir('download') symlink_dir = tmpdir.mkdir('symlink') symlink = symlink_dir.join('fake_genomic.gbff.gz') os.symlink("/foo/bar", str(symlink)) req.get('https://fake/path/fake_genomic.gbff.gz', text=fake_file.read()) assert core.worker( core.download_file_job(entry, str(dl_dir), checksums, symlink_path=str(symlink_dir))) assert symlink.check()
def test_download_file_rna_fasta(req, tmpdir): entry = {'ftp_path': 'ftp://fake/path'} fake_file = tmpdir.join('fake_rna_from_genomic.fna.gz') fake_file.write('foo') assert fake_file.check() checksum = core.md5sum(str(fake_file)) checksums = [ { 'checksum': checksum, 'file': fake_file.basename }, ] dl_dir = tmpdir.mkdir('download') req.get('http://fake/path/fake_rna_from_genomic.fna.gz', text=fake_file.read()) assert core.worker( core.download_file(entry, str(dl_dir), checksums, 'rna-fasta'))
def prepare_create_downloadjob(req, tmpdir, format_map=NgdConfig._FORMATS, human_readable=False, create_local_file=False): # Set up test env entry = { 'assembly_accession': 'FAKE0.1', 'organism_name': 'Example species', 'infraspecific_name': 'strain=ABC 1234', 'ftp_path': 'https://fake/genomes/FAKE0.1' } config = NgdConfig() outdir = tmpdir.mkdir('output') download_jobs = [] config.output = str(outdir) config.human_readable = human_readable checksum_file_content = '' for key, val in format_map.items(): seqfile = tmpdir.join('fake{}'.format(val)) seqfile.write(key) checksum = core.md5sum(str(seqfile)) filename = path.basename(str(seqfile)) full_url = 'https://fake/genomes/FAKE0.1/{}'.format(filename) local_file = outdir.join('refseq', 'bacteria', 'FAKE0.1', filename) if create_local_file: local_file.write(seqfile.read(), ensure=True) symlink_path = None if human_readable: symlink_path = str( outdir.join('human_readable', 'refseq', 'bacteria', 'Example', 'species', 'ABC_1234', filename)) download_jobs.append(core.DownloadJob(full_url, str(local_file), checksum, symlink_path)) checksum_file_content += '{}\t./{}\n'.format(checksum, filename) req.get(full_url, text=seqfile.read()) req.get('https://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content) return entry, config, download_jobs
def test_md5sum(): expected = '74d72df33d621f5eb6300dc9a2e06573' filename = _get_file('partial_summary.txt') ret = core.md5sum(filename) assert ret == expected
def create_checksum_line(filename): return '{}\t./{}\n'.format(core.md5sum(filename), path.basename(filename))