Exemplo n.º 1
0
def download_ucsc_tables(genome,
                         output_dir):
    """
    Download all relevant UCSC tables for a given genome.
    """
    tables_outdir = os.path.join(output_dir, "ucsc")
    utils.make_dir(tables_outdir)
    print "Download UCSC tables..."
    print "  - Output dir: %s" %(tables_outdir)
    ucsc_tables = get_ucsc_tables_urls(genome)
    for table_label, table_url in ucsc_tables:
        print "Downloading %s" %(table_label)
        # If the table exists in uncompressed form, don't download it
        table_filename = os.path.join(tables_outdir, table_label)
        unzipped_table_fname = table_filename[0:-3]
        if os.path.isfile(unzipped_table_fname):
            print "Got %s already. Skipping download.." \
                %(unzipped_table_fname)
            continue
        # Download table
        download_status = download_utils.download_url(table_url,
                                                      tables_outdir)
        if download_status is None:
            print "Failed to get %s, skipping.." %(table_label)
            continue
        # Uncompress table
        utils.gunzip_file(table_filename, tables_outdir)
Exemplo n.º 2
0
def download_ncbi_fasta(access_id, output_dir):
    """
    Download NCBI FASTA file by accession number and
    label them as access.fasta in the given output directory.
    """
    ncbi_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=%s&rettype=fasta&retmode=text" \
        %(access_id)
    url_filename = download_utils.download_url(ncbi_url,
                                               output_dir,
                                               basename="%s.fa" %(access_id),
                                               binary=False)
    return url_filename