def download_ucsc_tables(genome, output_dir): """ Download all relevant UCSC tables for a given genome. """ tables_outdir = os.path.join(output_dir, "ucsc") utils.make_dir(tables_outdir) print "Download UCSC tables..." print " - Output dir: %s" %(tables_outdir) ucsc_tables = get_ucsc_tables_urls(genome) for table_label, table_url in ucsc_tables: print "Downloading %s" %(table_label) # If the table exists in uncompressed form, don't download it table_filename = os.path.join(tables_outdir, table_label) unzipped_table_fname = table_filename[0:-3] if os.path.isfile(unzipped_table_fname): print "Got %s already. Skipping download.." \ %(unzipped_table_fname) continue # Download table download_status = download_utils.download_url(table_url, tables_outdir) if download_status is None: print "Failed to get %s, skipping.." %(table_label) continue # Uncompress table utils.gunzip_file(table_filename, tables_outdir)
def download_ncbi_fasta(access_id, output_dir): """ Download NCBI FASTA file by accession number and label them as access.fasta in the given output directory. """ ncbi_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=%s&rettype=fasta&retmode=text" \ %(access_id) url_filename = download_utils.download_url(ncbi_url, output_dir, basename="%s.fa" %(access_id), binary=False) return url_filename