def genome_download(name, output_path): path = ''.join([output_path + name.replace(" ", "_")]) os.makedirs(path) ngd.download(group="bacteria", genus=name, file_format="fasta", parallel=10, dry_run=True) ngd.download(group="bacteria", genus=name, file_format="fasta", parallel=10, dry_run=False, output=path) files = [] for r, d, f in os.walk(path): for file in f: if '.gz' in file: files.append(os.path.join(r, file)) for f in files: sh.gunzip(f) files2 = [] for r, d, f in os.walk(path): for file in f: if '.fna' in file: files2.append(os.path.join(r, file)) out = ''.join([output_path + "/" + name.replace(" ", "_") + ".fasta"]) sh.cat(files2, _out=out) return path
def ngd_download(dir_path, acc_ID, data_folder): download = False print('+ Check data for ID: ', acc_ID) if os.path.exists(dir_path): print('+ Folder already exists: ', dir_path) ## get files download (genome, prot, gff, gbk) = get_files_download(dir_path) if all([genome, prot, gff, gbk]): download = False else: print('+ Not all necessary data is available. Download it again.') download = True else: download = True if download: print('+ Downloading:') ## download in data folder provided ngd.download(section='genbank', file_formats='fasta,gff,protein-fasta,genbank', assembly_accessions=acc_ID, output=data_folder, groups='bacteria') ## check if files are gunzip files = os.listdir(dir_path) files_list = [] for f in files: if f.endswith('gz'): files_list.append(f) print("\t- Extracting files: ", f) HCGB_files.extract(dir_path + '/' + f, dir_path) #os.remove(dir_path + '/' + f) else: print('+ Data is already available, no need to download it again')
def main(name=None, odir=None, formats='fasta', ids_list=None, size_of_batch=30, parallel=10): # name = "Nitrospirae;" # formats = 'fasta,protein-fasta' # odir = '/share/home-user/thliao/data/NCBI_genbank' # db_dir formats = formats.split(',') odir = realpath(odir) if ids_list: domain2aids, cinfos = id2domain_to_ids(ids_list) else: domain2aids, cinfos = from_name2ids(name) # filter with existing files downloaded_aids = [] new_domain2aids = {} for d, aids in domain2aids.items(): old_d = aids[::] curr_dir = join(db_dir, 'genbank', d) if 'fasta' in formats: # check whether other kinds of files have been downloaded sub_aids = [ _ for _ in tqdm(aids) if not glob(join(curr_dir, _, '*.fna.gz')) ] new_domain2aids[d] = sub_aids downloaded_aids.extend(new_domain2aids[d]) print( f"domain: {d}, original number of ids: {len(old_d)}, now ids: {len(new_domain2aids[d])} " ) _d = { "assembly_accessions": '', "dry_run": False, "section": "genbank", "parallel": parallel, "output": db_dir, # all genomes were downloaded to db_dir "file_formats": formats } print(f'params is {_d}') for batch_aids in tqdm(batch_iter(downloaded_aids, size_of_batch)): ngd.download( **{ "assembly_accessions": ','.join(batch_aids), "dry_run": False, "section": "genbank", "parallel": parallel, "output": db_dir, # all genomes were downloaded to db_dir "file_formats": formats }) with open(join(odir, 'metadata.csv'), 'w') as f1: f1.write('\n'.join(cinfos))
def main(): '''Build and parse command line''' parser = argparse.ArgumentParser() parser.add_argument('domain', choices=['all'] + ncbi_genome_download.SUPPORTED_DOMAINS, help='The NCBI "domain" to download') parser.add_argument('-s', '--section', dest='section', default='refseq', choices=['refseq', 'genbank'], help='NCBI section to download') parser.add_argument('-F', '--format', dest='file_format', default='genbank', choices=['all'] + list(ncbi_genome_download.FORMAT_NAME_MAP.keys()), help='Which format to download (default: genbank)') parser.add_argument('-l', '--assembly-level', dest='assembly_level', default='all', choices=['all'] + list(ncbi_genome_download.ASSEMBLY_LEVEL_MAP.keys()), help='Assembly level of genomes to download (default: all)') parser.add_argument('-g', '--genus', dest='genus', default='', help='Only download sequences of the provided genus. (default: unset, download all)') parser.add_argument('-T', '--species-taxid', dest='species_taxid', help='Only download sequences of the provided species NCBI taxonomy ID. (default: unset, download all)') parser.add_argument('-t', '--taxid', dest='taxid', help='Only download sequences of the provided NCBI taxonomy ID. (default: unset, download all)') parser.add_argument('-o', '--output-folder', dest='output', default=os.getcwd(), help='Create output hierarchy in specified folder (default: current directory)') parser.add_argument('-u', '--uri', dest='uri', default=ncbi_genome_download.NCBI_URI, help='NCBI base URI to use') parser.add_argument('-p', '--parallel', dest='parallel', default=1, type=int, metavar="N", help='Run N downloads in parallel (default: 1)') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='increase output verbosity') parser.add_argument('-d', '--debug', action='store_true', default=False, help='print debugging information') parser.add_argument('-V', '--version', action='version', version=ncbi_genome_download.__version__, help='print version information') args = parser.parse_args() if args.debug: log_level = logging.DEBUG elif args.verbose: log_level = logging.INFO else: log_level = logging.WARNING logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) ncbi_genome_download.download(args)
def download_Refseq_files(outdir,cpus=1,names=False,taxids=False): assemblies,species_tags = check_db(outdir) files = ["fasta","protein-fasta","assembly-stats"] if not (names or taxids): print("Must specify a name or a taxid.") elif os.path.exists(os.path.join(outdir,"refseq")): print("Refseq download already exists at", os.path.join(outdir,"refseq")) print("Delete before proceeding.") else: if names: for name in names.split(","): print("Downloading files for {}...".format(name)) for f in files: print("\tworking on {} files...".format(f)) if cpus == 1: ngd.download(group="bacteria",genus=name,file_format=f,section="refseq",output=outdir) else: ngd.download(group="bacteria",genus=name,file_format=f,section="refseq",output=outdir,parallel=cpus) if taxids: for taxid in taxids.split(","): print("Downloading files for {}...".format(str(taxid))) for f in files: print("\tworking on {} files...".format(f)) if cpus == 1: ngd.download(group="bacteria",taxid=taxid,file_format=f,section="refseq",output=outdir) else: ngd.download(group="bacteria",taxid=taxid,file_format=f,section="refseq",output=outdir,parallel=cpus) process_Refseq(outdir,assemblies,species_tags) if os.path.exists(os.path.join(outdir,"refseq")): shutil.rmtree(os.path.join(outdir,"refseq")) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('domain', choices=['all'] + ncbi_genome_download.supported_domains, help='The NCBI "domain" to download') parser.add_argument('-s', '--section', dest='section', default='refseq', choices=['refseq', 'genbank'], help='NCBI section to download') parser.add_argument('-F', '--format', dest='file_format', default='genbank', choices=['all'] + ncbi_genome_download.format_name_map.keys(), help='Which format to download (default: genbank)') parser.add_argument('-o', '--output-folder', dest='output', default=os.getcwd(), help='Create output hierarchy in specified folder (default: current directory)') parser.add_argument('-u', '--uri', dest='uri', default=ncbi_genome_download.NCBI_URI, help='NCBI base URI to use') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='increase output verbosity') parser.add_argument('-d', '--debug', action='store_true', default=False, help='print debugging information') parser.add_argument('-V', '--version', action='version', version=ncbi_genome_download.__version__, help='print version information') args = parser.parse_args() if args.debug: log_level = logging.DEBUG elif args.verbose: log_level = logging.INFO else: log_level = logging.WARNING logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) ncbi_genome_download.download(args)
def main(): args = parse_args() meta_file = "{}.meta".format(args.name.replace(" ", "_")) logger.info(args) if not args.build: if pathlib.Path(args.outdir).exists(): sys.exit( "The folder {} exists. Please choose another name \n or rename that folder and run again" .format(args.outdir)) print("Start downloading {}".format(args.taxid)) print("Location {}".format(args.outdir)) ngd.download(section=args.section, taxid=args.taxid, group=args.group, output=args.outdir, file_format='genbank', assembly_level=args.assembly_level, metadata_table=meta_file, parallel=args.parallel) if not pathlib.Path(meta_file).exists(): sys.exit("Download error! Please check log file") num_lines = sum(1 for line in open(meta_file)) logger.info("Downloaded {} files for {}".format((num_lines - 1), args.taxid)) if not pathlib.Path(args.outdir).exists(): sys.exit("Folder {} not existed!".format(args.outdir)) print("Start building DB for: {}".format(args.name)) make_database(path=args.outdir, db_name=args.name, ext=args.ext, parallel=args.parallel) print("Finished!")
def main(): """Build and parse command line""" parser = argparse.ArgumentParser() parser.add_argument( 'group', choices=dflt.TAXONOMIC_GROUPS.choices, default=dflt.TAXONOMIC_GROUPS.default, help='The NCBI taxonomic group to download (default: %(default)s)') parser.add_argument('-s', '--section', dest='section', choices=dflt.SECTIONS.choices, default=dflt.SECTIONS.default, help='NCBI section to download (default: %(default)s)') parser.add_argument('-F', '--format', dest='file_format', choices=dflt.FORMATS.choices, default=dflt.FORMATS.default, help='Which format to download (default: %(default)s)') parser.add_argument( '-l', '--assembly-level', dest='assembly_level', choices=dflt.ASSEMBLY_LEVELS.choices, default=dflt.ASSEMBLY_LEVELS.default, help='Assembly level of genomes to download (default: %(default)s)') parser.add_argument( '-g', '--genus', dest='genus', default=dflt.GENUS.default, help= 'Only download sequences of the provided genus. (default: %(default)s)' ) parser.add_argument( '-T', '--species-taxid', dest='species_taxid', default=dflt.SPECIES_TAXID.default, help='Only download sequences of the provided species NCBI taxonomy ID. ' '(default: %(default)s)') parser.add_argument( '-t', '--taxid', dest='taxid', default=dflt.TAXID.default, help='Only download sequences of the provided NCBI taxonomy ID. (' 'default: %(default)s)') parser.add_argument( '-R', '--refseq-category', dest='refseq_category', choices=dflt.REFSEQ_CATEGORIES.choices, default=dflt.REFSEQ_CATEGORIES.default, help= 'Only download sequences of the provided refseq category (default: %(default)s)' ) parser.add_argument( '-o', '--output-folder', dest='output', default=dflt.OUTPUT.default, help= 'Create output hierarchy in specified folder (default: %(default)s)') parser.add_argument( '-H', '--human-readable', dest='human_readable', action='store_true', help='Create links in human-readable hierarchy (might fail on Windows)' ) parser.add_argument('-u', '--uri', dest='uri', default=dflt.URI.default, help='NCBI base URI to use (default: %(default)s)') parser.add_argument( '-p', '--parallel', dest='parallel', type=int, metavar="N", default=dflt.NB_PROCESSES.default, help='Run %(metavar)s downloads in parallel (default: %(default)s)') parser.add_argument( '-r', '--retries', dest='retries', type=int, metavar="N", default=0, help='Retry download %(metavar)s times when connection to NCBI fails (' 'default: %(default)s)') parser.add_argument('-m', '--metadata-table', type=str, help='Save tab-delimited file with genome metadata') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') parser.add_argument('-d', '--debug', action='store_true', help='print debugging information') parser.add_argument('-V', '--version', action='version', version=__version__, help='print version information') args = parser.parse_args() if args.debug: log_level = logging.DEBUG elif args.verbose: log_level = logging.INFO else: log_level = logging.WARNING logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) kwargs = vars(args) del kwargs['debug'] del kwargs['verbose'] max_retries = kwargs.pop( 'retries') # Default value is set in parser argument attempts = 0 ret = download(**kwargs) while ret == 75 and attempts < max_retries: attempts += 1 logging.error( 'Downloading from NCBI failed due to a connection error, retrying. Retries so far: %s', attempts) ret = download(**kwargs) return ret
def main(): parser = argparse.ArgumentParser() parser.add_argument('domain', choices=['all'] + ncbi_genome_download.supported_domains, help='The NCBI "domain" to download') parser.add_argument('-s', '--section', dest='section', default='refseq', choices=['refseq', 'genbank'], help='NCBI section to download') parser.add_argument('-F', '--format', dest='file_format', default='genbank', choices=['all'] + list(ncbi_genome_download.format_name_map.keys()), help='Which format to download (default: genbank)') parser.add_argument( '-l', '--assembly-level', dest='assembly_level', default='all', choices=['all'] + list(ncbi_genome_download.assembly_level_map.keys()), help='Assembly level of genomes to download (default: all)') parser.add_argument( '-g', '--genus', dest='genus', default='', help= 'Only download sequences of the provided genus. (default: unset, download all)' ) parser.add_argument( '-o', '--output-folder', dest='output', default=os.getcwd(), help= 'Create output hierarchy in specified folder (default: current directory)' ) parser.add_argument('-u', '--uri', dest='uri', default=ncbi_genome_download.NCBI_URI, help='NCBI base URI to use') parser.add_argument('-p', '--parallel', dest='parallel', default=1, type=int, metavar="N", help='Run N downloads in parallel (default: 1)') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='increase output verbosity') parser.add_argument('-d', '--debug', action='store_true', default=False, help='print debugging information') parser.add_argument('-V', '--version', action='version', version=ncbi_genome_download.__version__, help='print version information') args = parser.parse_args() if args.debug: log_level = logging.DEBUG elif args.verbose: log_level = logging.INFO else: log_level = logging.WARNING logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) ncbi_genome_download.download(args)
def main(name=None, odir=None, taxons=None, formats='fasta', ids_list=None, size_of_batch=30, parallel=10, enable_check=True, section='genbank', group='bacteria', dry_run=False): formats = formats.split(',') if odir is None: odir = db_dir else: odir = realpath(odir) if enable_check: if ids_list: # should be assembly ID list domain2aids, cinfos = id2domain_to_ids(ids_list) elif name is not None: domain2aids, cinfos = from_name2ids(name, dataset=section) elif taxons is not None: domain2aids, cinfos = from_tid2ids(taxons) # filter with existing files downloaded_aids = [] new_domain2aids = {} for d, aids in domain2aids.items(): sub_aids = check_not_down(formats, aids, d, odir) new_domain2aids[d] = sub_aids downloaded_aids.extend(new_domain2aids[d]) tqdm.write( f"domain: {d}, original number of ids: {len(aids)}, now ids: {len(new_domain2aids[d])} " ) elif not enable_check and ids_list: # disable the check and give a list of ids_list downloaded_aids = ids_list[::] if dry_run: with open(f'{odir}/downloaded_aids.list', 'w') as f1: f1.write('\n'.join(downloaded_aids)) _d = { "dry_run": dry_run, "section": section, "groups": group, "parallel": parallel, "output": odir, "file_formats": formats } tqdm.write(f'params is {_d}') for batch_aids in tqdm(batch_iter(downloaded_aids, size_of_batch)): ngd.download( **{ "assembly_accessions": ','.join(batch_aids), "dry_run": dry_run, "use_cache": True, # to avoid it automatic download/update the summary file "section": section, "parallel": parallel, "output": odir, "groups": group, # if not assign this, it will take long time to iterate all groups "file_formats": formats }) with open(join(odir, 'metadata.csv'), 'w') as f1: f1.write('\n'.join(cinfos))
def download_from_ncbi(species_linked, section, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, spe_strains, levels, outdir, threads): """ Download ncbi genomes of given species Parameters ---------- species_linked : str given NCBI species with '_' instead of spaces, or NCBI taxID if species name not given section : str genbank or only refseq (default = refseq) ncbi_species_name : str or None name of species to download: user given NCBI species. None if no species name given ncbi_species_taxid : int species taxid given in NCBI (-T option) ncbi_taxid : int taxid given in NCBI (-t option) spe_strains : str specific strain name, or comma-separated strain names (or name of a file with one strain name per line) outdir : str Directory where downloaded sequences must be saved threads : int Number f threads to use to download genome sequences Returns ------- str : Output filename of downloaded summary """ # Name of summary file, with metadata for each strain: sumfile = os.path.join(outdir, f"assembly_summary-{species_linked}.txt") abs_sumfile = os.path.abspath(sumfile) # arguments needed to download all genomes of the given species abs_outdir = os.path.abspath(outdir) keyargs = {"section": section, "file_formats": "fasta", "output": abs_outdir, "parallel": threads, "groups": "bacteria", "metadata_table":abs_sumfile} message = f"From {section}: " # Specific strains: downloaded only if compatible with ncbi species/taxids if spe_strains: keyargs["strains"] = spe_strains if os.path.isfile(spe_strains): message += f"Downloading all strains specified in {spe_strains} file" else: message += f"Downloading the following specified strain(s): {spe_strains}" if ncbi_species_name or ncbi_species_taxid or ncbi_taxid: message += ", which also have: " if ncbi_species_name: keyargs["genera"] = ncbi_species_name message += f"\n\t-NCBI species = {ncbi_species_name}" if ncbi_species_taxid: keyargs["species_taxids"] = ncbi_species_taxid message += f"\n\t-NCBI_species_taxid = {ncbi_species_taxid}" if ncbi_taxid: keyargs["taxids"] = ncbi_taxid message += f"\n\t-NCBI_taxid = {ncbi_taxid})." # Not downloading specific strains, but a sub-species: must be compatible with species given elif ncbi_taxid: keyargs["taxids"] = ncbi_taxid message += f"Downloading genomes with NCBI_taxid = {ncbi_taxid}" if ncbi_species_name or ncbi_species_taxid: message += ", which also have: " if ncbi_species_name: keyargs["genera"] = ncbi_species_name message += f"\n\t-NCBI species = {ncbi_species_name}" if ncbi_species_taxid: keyargs["species_taxids"] = ncbi_species_taxid message += f"\n\t-NCBI_species_taxid = {ncbi_species_taxid}" # Downloading all genomes of a species else: message += "Downloading all genomes of " # If NCBI species given, add it to arguments to download genomes, # and write it to info message if ncbi_species_name: keyargs["genera"] = ncbi_species_name message += f"NCBI species = {ncbi_species_name}" # If NCBI species given, add it to arguments to download genomes, # and write it to info message if ncbi_species_taxid: keyargs["species_taxids"] = ncbi_species_taxid if ncbi_species_name: message += f" (NCBI_species_taxid = {ncbi_species_taxid})." else: message += f"NCBI_species_taxid = {ncbi_species_taxid}" # If assembly level(s) given, add it to arguments, and write to info message if levels: keyargs["assembly_levels"] = levels message += f" (Only those assembly levels: {levels}). " logger.info(f"Metadata for all genomes will be saved in {sumfile}") logger.info(message) # Download genomes max_retries = 15 # If connection to NCBI fails, how many retry downloads must be done error_message = ("No strain correspond to your request. If you are sure there should have " "some, check that you gave valid NCBI taxid and/or " "NCBI species name and/or NCBI strain name. If you gave several, check that " "given taxIDs and names are compatible.") # widgets = [progressbar.BouncingBar(marker=progressbar.RotatingMarker(markers="◐◓◑◒")), # " - ", progressbar.Timer()] # bar = progressbar.ProgressBar(widgets=widgets, max_value=20, term_width=50) try: # Download genomes # ret = None # while True: # if ret: # break # bar.update() ret = ngd.download(**keyargs) except: # pragma: no cover # Error message if crash during execution of ncbi_genome_download logger.error(error_message) # bar.finish() sys.exit(1) attempts = 0 while ret == 75 and attempts < max_retries: # pragma: no cover # bar.update() attempts += 1 logging.error(('Downloading from NCBI failed due to a connection error, ' 'retrying. Already retried so far: %s'), attempts) ret = ngd.download(**keyargs) # bar.finish() # Message if NGD did not manage to download the genomes (wrong species name/taxid) if ret != 0: # Error message logger.error(error_message) sys.exit(1) nb_gen, db_dir = to_database(outdir, section) return db_dir, nb_gen
from ncbi_genome_download import download from subprocess import run from glob import glob accession_dict = { 'genbank': [], 'refseq': [] } with open(input[0]) as fp: for accession in fp: accession = accession.strip() prefix, skip = accession.split('_', 1) section = 'refseq' if prefix == 'GCF' else 'genbank' if not glob(f'data/{section}/*/{accession}/*.fna*'): accession_dict[section].append(accession) for section, accessions in accession_dict.items(): if accessions: download(section = section, file_formats = 'fasta,protein-fasta', assembly_accessions = accessions, output = 'data', parallel = threads) gzip_files = glob(f'data/{section}/*/*/*.gz') if gzip_files: run([ 'gunzip' ] + gzip_files) with open(output[0], 'w') as fp: fp.write('OK')
def ngd_download(section_given, acc_ID, data_folder, debug, section='genbank', assembly_level='complete', group_given='bacteria'): ''' Function that calls and retrieves data from NCBI using python package ngd. :param acc_ID: :param data_folder: Folder to store data. :param debug: True/false for debugging messages :attention Module ngd requires to download data in bacteria/archaea subfolder under genbank or refseq folder. ''' ################################## ## check if necessary to download ################################## ## get path print('+ Check data for ID: ', acc_ID) dir_path = os.path.join(data_folder, section_given, group_given, acc_ID) ## check if previously download download = False if os.path.exists(dir_path): print('+ Folder already exists: ', dir_path) ## get files download (genome, prot, gff, gbk) = BacDup.scripts.functions.get_files_annotation(dir_path, debug) if (gbk): ## Only genbank format file is required download = False else: print('+ Not all necessary data is available. Download it again.') download = True else: download = True ## download data if download: print("\n+ Downloading data for: " + colored(acc_ID, 'green')) ## download in data folder provided if (debug): debug_message("ngd.download call", color="yellow") debug_message("dir_path: " + dir_path, color="yellow") debug_message("section_given: " + section_given, color="yellow") ## download if debug: debug_message( "section='%s', file_formats='genbank', assembly_level=%s, assembly_accessions=%s, output=%s, groups=%s" % (section_given, assembly_level, acc_ID, data_folder, group_given), color="yellow") try: ngd.download(section=section_given, file_formats='genbank', assembly_levels=assembly_level, assembly_accessions=acc_ID, output=data_folder, groups=group_given) except: raise ( "A problem occurred when contacting NCBI for downloading id (%s) from %s" % (acc_ID, section_given)) ## return empty if not os.path.isdir(dir_path): return False ## check if files are gunzip files = os.listdir(dir_path) files_list = [] for f in files: if f.endswith('gz'): files_list.append(f) print("\t- Extracting files: ", f) HCGB.functions.files_functions.extract(dir_path + '/' + f, dir_path) #os.remove(dir_path + '/' + f) ## skip else: print('\t+ Data is already available, no need to download it again') print() ## return path where data is return (dir_path)
def main(): '''Build and parse command line''' parser = argparse.ArgumentParser() parser.add_argument('domain', choices=['all'] + ncbi_genome_download.SUPPORTED_DOMAINS, help='The NCBI "domain" to download') parser.add_argument('-s', '--section', dest='section', default='refseq', choices=['refseq', 'genbank'], help='NCBI section to download') parser.add_argument('-F', '--format', dest='file_format', default='genbank', choices=['all'] + list(ncbi_genome_download.FORMAT_NAME_MAP.keys()), help='Which format to download (default: genbank)') parser.add_argument('-l', '--assembly-level', dest='assembly_level', default='all', choices=['all'] + list(ncbi_genome_download.ASSEMBLY_LEVEL_MAP.keys()), help='Assembly level of genomes to download (default: all)') parser.add_argument('-g', '--genus', dest='genus', default='', help='Only download sequences of the provided genus. (default: unset, download all)') parser.add_argument('-T', '--species-taxid', dest='species_taxid', help='Only download sequences of the provided species NCBI taxonomy ID. ' '(default: unset, download all)') parser.add_argument('-t', '--taxid', dest='taxid', help='Only download sequences of the provided NCBI taxonomy ID. (default: unset, download all)') parser.add_argument('-o', '--output-folder', dest='output', default=os.getcwd(), help='Create output hierarchy in specified folder (default: current directory)') parser.add_argument('-H', '--human-readable', dest='human_readable', default=False, action='store_true', help='Create links in human-readable hierarchy (might fail on Windows)') parser.add_argument('-u', '--uri', dest='uri', default=ncbi_genome_download.NCBI_URI, help='NCBI base URI to use') parser.add_argument('-p', '--parallel', dest='parallel', default=1, type=int, metavar="N", help='Run N downloads in parallel (default: 1)') parser.add_argument('-r', '--retries', dest='retries', default=0, type=int, metavar="N", help='Retry download N times when connection to NCBI fails (default: 0)') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='increase output verbosity') parser.add_argument('-d', '--debug', action='store_true', default=False, help='print debugging information') parser.add_argument('-V', '--version', action='version', version=ncbi_genome_download.__version__, help='print version information') args = parser.parse_args() if args.debug: log_level = logging.DEBUG elif args.verbose: log_level = logging.INFO else: log_level = logging.WARNING logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) retries = 0 ret = ncbi_genome_download.download(args) while ret == 75 and retries < args.retries: retries += 1 logging.error('Downloading from NCBI failed due to a connection error, retrying. Retries so far: %s', retries) ret = ncbi_genome_download.download(args) return ret