def test_filter_entries(): """Test filter_entries.""" config = NgdConfig() with open(_get_file('assembly_status.txt'), 'r') as fh: entries = list(core.parse_summary(fh)) assert core.filter_entries(entries, config) == entries expected = entries[-1:] config.assembly_accessions = "GCF_000203835.1" assert core.filter_entries(entries, config) == expected
def prepare_create_downloadjob(req, tmpdir, format_map=NgdConfig._FORMATS, human_readable=False, create_local_file=False): # Set up test env entry = { 'assembly_accession': 'FAKE0.1', 'organism_name': 'Example species', 'infraspecific_name': 'strain=ABC 1234', 'ftp_path': 'https://fake/genomes/FAKE0.1' } config = NgdConfig() outdir = tmpdir.mkdir('output') download_jobs = [] config.output = str(outdir) config.human_readable = human_readable checksum_file_content = '' for key, val in format_map.items(): seqfile = tmpdir.join('fake{}'.format(val)) seqfile.write(key) checksum = core.md5sum(str(seqfile)) filename = path.basename(str(seqfile)) full_url = 'https://fake/genomes/FAKE0.1/{}'.format(filename) local_file = outdir.join('refseq', 'bacteria', 'FAKE0.1', filename) if create_local_file: local_file.write(seqfile.read(), ensure=True) symlink_path = None if human_readable: symlink_path = str( outdir.join('human_readable', 'refseq', 'bacteria', 'Example', 'species', 'ABC_1234', filename)) download_jobs.append( core.DownloadJob(full_url, str(local_file), checksum, symlink_path)) checksum_file_content += '{}\t./{}\n'.format(checksum, filename) req.get(full_url, text=seqfile.read()) req.get('https://fake/genomes/FAKE0.1/md5checksums.txt', text=checksum_file_content) return entry, config, download_jobs