def download_file(file_url, dest_dir, aspera): print ('Downloading file', file_url) if aspera: success = utils.get_aspera_file(file_url, dest_dir) else: success = utils.get_ftp_file('ftp://' + file_url, dest_dir) if not success: success = utils.get_ftp_file('ftp://' + file_url, dest_dir) if not success: print ('Failed to download file')
def download_versioned_wgs(dest_dir, accession, format): prefix = accession[:6] if format is None: format = utils.EMBL_FORMAT public_set_url = utils.get_wgs_ftp_url(prefix, utils.PUBLIC, format) supp_set_url = utils.get_wgs_ftp_url(prefix, utils.SUPPRESSED, format) success = utils.get_ftp_file(public_set_url, dest_dir) if not success: success = utils.get_ftp_file(supp_set_url, dest_dir) if not success: print ('No WGS set file available for ' + accession + ', format ' + format) print ('Please contact ENA ([email protected]) if you feel this set should be available')
def download_unversioned_wgs(dest_dir, accession, format): prefix = accession[:4] if format is None: format = utils.EMBL_FORMAT public_set_url = utils.get_nonversioned_wgs_ftp_url(prefix, utils.PUBLIC, format) if public_set_url is not None: utils.get_ftp_file(public_set_url, dest_dir) else: supp_set_url = utils.get_nonversion_supp_wgs_ftp_url(prefix, format) if supp_set_url is not None: utils.get_ftp_file(supp_set_url, dest_dir) else: print ('No WGS set file available for ' + accession + ', format ' + format) print ('Please contact ENA ([email protected]) if you feel this set should be available')
def download_versioned_wgs(dest_dir, accession, output_format): prefix = accession[:6] public_set_url = utils.get_wgs_ftp_url(prefix, utils.PUBLIC, output_format) supp_set_url = utils.get_wgs_ftp_url(prefix, utils.SUPPRESSED, output_format) success = utils.get_ftp_file(public_set_url, dest_dir) if not success: success = utils.get_ftp_file(supp_set_url, dest_dir) if not success: print('No WGS set file available for {0}, format {1}'.format( accession, output_format)) print( 'Please contact ENA (https://www.ebi.ac.uk/ena/browser/support) if you feel this set should be available' )
def download_unversioned_wgs(dest_dir, accession, output_format): prefix = accession[:4] if output_format is None: output_format = utils.EMBL_FORMAT public_set_url = utils.get_nonversioned_wgs_ftp_url(prefix, utils.PUBLIC, output_format) if public_set_url is not None: utils.get_ftp_file(public_set_url, dest_dir) else: supp_set_url = utils.get_nonversioned_wgs_ftp_url(prefix, utils.SUPPRESSED, output_format) if supp_set_url is not None: utils.get_ftp_file(supp_set_url, dest_dir) else: print 'No WGS set file available for {0}, format {1}'.format(accession, output_format) print 'Please contact ENA ([email protected]) if you feel this set should be available'
def download_assembly(dest_dir, accession, output_format, fetch_wgs, extract_wgs, expanded, quiet=False): if output_format is None: output_format = utils.EMBL_FORMAT assembly_dir = os.path.join(dest_dir, accession) utils.create_dir(assembly_dir) # download xml utils.download_record(assembly_dir, accession, utils.XML_FORMAT) local_xml = utils.get_destination_file(assembly_dir, accession, utils.XML_FORMAT) # get wgs and sequence report info wgs_set, sequence_report = parse_assembly_xml(local_xml) has_sequence_report = False # download sequence report if sequence_report is not None: has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir) # parse sequence report and download sequences wgs_scaffolds = [] wgs_scaffold_cnt = 0 if has_sequence_report: wgs_scaffolds = download_sequences(sequence_report.split('/')[-1], assembly_dir, output_format, expanded, quiet) wgs_scaffold_cnt = len(wgs_scaffolds) if wgs_scaffold_cnt > 0: if not quiet: print 'Assembly contains {} WGS scaffolds, will fetch WGS set'.format(wgs_scaffold_cnt) fetch_wgs = True else: fetch_wgs = True # download wgs set if needed if wgs_set is not None and fetch_wgs: if not quiet: print 'fetching wgs set' sequenceGet.download_wgs(assembly_dir, wgs_set, output_format) # extract wgs scaffolds from WGS file if wgs_scaffold_cnt > 0 and extract_wgs: extract_wgs_scaffolds(assembly_dir, wgs_scaffolds, wgs_set, output_format, quiet)
def download_assembly(dest_dir, accession, output_format, fetch_wgs, quiet=False): if output_format is None: output_format = utils.EMBL_FORMAT assembly_dir = os.path.join(dest_dir, accession) utils.create_dir(assembly_dir) # download xml utils.download_record(assembly_dir, accession, utils.XML_FORMAT) local_xml = utils.get_destination_file(assembly_dir, accession, utils.XML_FORMAT) # get wgs and sequence report info wgs_set, sequence_report = parse_assembly_xml(local_xml) has_sequence_report = False # download sequence report if sequence_report is not None: has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir) # download wgs set if needed if wgs_set is not None and fetch_wgs: if not quiet: print 'fetching wgs set' sequenceGet.download_wgs(assembly_dir, wgs_set, output_format) # parse sequence report and download sequences if has_sequence_report: download_sequences( sequence_report.split('/')[-1], assembly_dir, output_format, quiet)
def attempt_file_download(file_url, dest_dir, md5, aspera): if md5 is not None: print('Downloading file with md5 check:' + file_url) if aspera: return utils.get_aspera_file_with_md5_check(file_url, dest_dir, md5) else: return utils.get_ftp_file_with_md5_check('ftp://' + file_url, dest_dir, md5) print('Downloading file:' + file_url) if aspera: return utils.get_aspera_file(file_url, dest_dir) return utils.get_ftp_file('ftp://' + file_url, dest_dir)