Exemplo n.º 1
0
def ensembl(args):
    """
    %prog ensembl species

    Retrieve genomes and annotations from ensembl FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog ensembl danio_rerio,gasterosteus_aculeatus
    """
    p = OptionParser(ensembl.__doc__)
    p.add_option("--version", default="75", help="Ensembl version")
    opts, args = p.parse_args(args)

    version = opts.version
    url = "ftp://ftp.ensembl.org/pub/release-{0}/".format(version)
    fasta_url = url + "fasta/"

    valid_species = [x for x in ls_ftp(fasta_url) if "." not in x]
    doc = "\n".join((ensembl.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (species,) = args
    species = species.split(",")
    for s in species:
        download_species_ensembl(s, valid_species, url)
Exemplo n.º 2
0
Arquivo: fetch.py Projeto: yangjl/jcvi
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    p = OptionParser(phytozome.__doc__)
    p.add_option("--version", default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly", default=False, action="store_true",
                 help="Download assembly [default: %default]")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
                    format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    species = species.split(",")
    for s in species:
        download_species_phytozome(s, valid_species, url, assembly=opts.assembly)
Exemplo n.º 3
0
Arquivo: fetch.py Projeto: yangjl/jcvi
def ensembl(args):
    """
    %prog ensembl species

    Retrieve genomes and annotations from ensembl FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog ensembl danio_rerio,gasterosteus_aculeatus
    """
    p = OptionParser(ensembl.__doc__)
    p.add_option("--version", default="75",
                 help="Ensembl version [default: %default]")
    opts, args = p.parse_args(args)

    version = opts.version
    url = "ftp://ftp.ensembl.org/pub/release-{0}/".format(version)
    fasta_url = url + "fasta/"

    valid_species = [x for x in ls_ftp(fasta_url) if "." not in x]
    doc = "\n".join((ensembl.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    species = species.split(",")
    for s in species:
        download_species_ensembl(s, valid_species, url)
Exemplo n.º 4
0
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    from jcvi.formats.gff import bed as gff_bed
    from jcvi.formats.fasta import format as fasta_format

    p = OptionParser(phytozome.__doc__)
    p.add_option("--version", default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly", default=False, action="store_true",
                 help="Download assembly [default: %default]")
    p.add_option("--format", default=False, action="store_true",
                 help="Format to CDS and BED for synteny inference")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
        format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    if species == "all":
        species = ",".join(valid_species)

    species = species.split(",")
    use_IDs = set()
    # We have to watch out when the gene names and mRNA names mismatch, in which
    # case we just extract the mRNA names
    use_mRNAs = set(["Cclementina", "Creinhardtii", "Csinensis", "Fvesca",
                     "Lusitatissimum", "Mesculenta", "Mguttatus", "Ppersica",
                     "Pvirgatum", "Rcommunis", "Sitalica", "Tcacao",
                     "Thalophila", "Vcarteri", "Vvinifera", "Zmays"])

    for s in species:
        gff, fa = download_species_phytozome(s, valid_species, url,
                                             assembly=opts.assembly)
        key = "ID" if s in use_IDs else "Name"
        ttype = "mRNA" if s in use_mRNAs else "gene"
        if not opts.format:
            continue

        bedfile = s + ".bed"
        cdsfile = s + ".cds"
        gff_bed([gff, "--type={}".format(ttype), "--key={}".format(key),
                 "-o", bedfile])
        fasta_format([fa, cdsfile, r"--sep=|"])
Exemplo n.º 5
0
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    from jcvi.formats.gff import bed as gff_bed
    from jcvi.formats.fasta import format as fasta_format

    p = OptionParser(phytozome.__doc__)
    p.add_option("--version", default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly", default=False, action="store_true",
                 help="Download assembly [default: %default]")
    p.add_option("--format", default=False, action="store_true",
                 help="Format to CDS and BED for synteny inference")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
                    format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    if species == "all":
        species = ",".join(valid_species)

    species = species.split(",")
    use_IDs = set()
    # We have to watch out when the gene names and mRNA names mismatch, in which
    # case we just extract the mRNA names
    use_mRNAs = set(["Cclementina", "Creinhardtii", "Csinensis", "Fvesca",
                    "Lusitatissimum", "Mesculenta", "Mguttatus", "Ppersica",
                    "Pvirgatum", "Rcommunis", "Sitalica", "Tcacao",
                    "Thalophila", "Vcarteri", "Vvinifera", "Zmays"])

    for s in species:
        gff, fa = download_species_phytozome(s, valid_species, url,
                                             assembly=opts.assembly)
        key = "ID" if s in use_IDs else "Name"
        ttype = "mRNA" if s in use_mRNAs else "gene"
        if not opts.format:
            continue

        bedfile = s + ".bed"
        cdsfile = s + ".cds"
        gff_bed([gff, "--type={}".format(ttype), "--key={}".format(key),
                 "-o", bedfile])
        fasta_format([fa, cdsfile, r"--sep=|"])
Exemplo n.º 6
0
def phytozome9(args):
    """
    %prog phytozome9 species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome9 Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    p = OptionParser(phytozome9.__doc__)
    p.add_option(
        "--assembly",
        default=False,
        action="store_true",
        help="Download assembly",
    )
    p.add_option(
        "--format",
        default=False,
        action="store_true",
        help="Format to CDS and BED for synteny inference",
    )
    opts, args = p.parse_args(args)

    version = "9.0"
    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".format(version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome9.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (species, ) = args
    if species == "all":
        species = ",".join(valid_species)

    species = species.split(",")

    for s in species:
        res = download_species_phytozome9(s,
                                          valid_species,
                                          url,
                                          assembly=opts.assembly)
        if not res:
            logging.error("No files downloaded")
        gff, cdsfa = res.get("gff"), res.get("cds")
        if opts.format:
            format_bed_and_cds(s, gff, cdsfa)
Exemplo n.º 7
0
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    from jcvi.formats.gff import bed as gff_bed
    from jcvi.formats.fasta import format as fasta_format

    p = OptionParser(phytozome.__doc__)
    p.add_option("--version",
                 default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly",
                 default=False,
                 action="store_true",
                 help="Download assembly [default: %default]")
    p.add_option("--format",
                 default=False,
                 action="store_true",
                 help="Format to CDS and BED for synteny inference")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
                    format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    species = species.split(",")
    for s in species:
        gff, fa = download_species_phytozome(s,
                                             valid_species,
                                             url,
                                             assembly=opts.assembly)
        if not opts.format:
            continue

        bedfile = s + ".bed"
        cdsfile = s + ".cds"
        gff_bed([gff, "--phytozome", "-o", bedfile])
        fasta_format([fa, cdsfile, r"--sep=|"])
Exemplo n.º 8
0
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    p = OptionParser(phytozome.__doc__)
    p.add_option("--version",
                 default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly",
                 default=False,
                 action="store_true",
                 help="Download assembly [default: %default]")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
                    format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    species = species.split(",")
    for s in species:
        download_species_phytozome(s,
                                   valid_species,
                                   url,
                                   assembly=opts.assembly)
Exemplo n.º 9
0
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome using Globus API. Available
    species listed below. Use comma to give a list of species to download. For
    example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum

    The downloader will prompt you to enter Phytozome user name and password
    during downloading. Please register for a login at:
    https://phytozome.jgi.doe.gov/pz/portal.html.
    """
    from jcvi.apps.biomart import GlobusXMLParser

    p = OptionParser(phytozome.__doc__)
    p.add_option(
        "--version",
        default="12",
        choices=("9", "10", "11", "12", "12_unrestricted", "13"),
        help="Phytozome version",
    )
    p.add_option(
        "--assembly",
        default=False,
        action="store_true",
        help="Download assembly",
    )
    p.add_option(
        "--format",
        default=False,
        action="store_true",
        help="Format to CDS and BED for synteny inference",
    )
    p.set_downloader()
    opts, args = p.parse_args(args)

    downloader = opts.downloader
    directory_listing = ".phytozome_directory_V{}.xml".format(opts.version)
    # Get directory listing
    base_url = "http://genome.jgi.doe.gov"
    dlist = "{}/ext-api/downloads/get-directory?organism=PhytozomeV{}".format(
        base_url, opts.version
    )

    # Make sure we have a valid cookies
    cookies = get_cookies()
    if cookies is None:
        logging.error("Error fetching cookies ... cleaning up")
        FileShredder([directory_listing])
        sys.exit(1)

    # Proceed to use the cookies and download the species list
    try:
        download(
            dlist,
            filename=directory_listing,
            cookies=cookies,
            downloader=downloader,
        )
        g = GlobusXMLParser(directory_listing)
    except:
        logging.error("Error downloading directory listing ... cleaning up")
        FileShredder([directory_listing, cookies])
        sys.exit(1)

    genomes = g.get_genomes()
    valid_species = genomes.keys()
    species_tile = tile(valid_species)
    p.set_usage("\n".join((phytozome.__doc__, species_tile)))

    if len(args) != 1:
        sys.exit(not p.print_help())

    (species,) = args
    if species == "all":
        species = ",".join(valid_species)

    species = species.split(",")
    for s in species:
        res = download_species_phytozome(
            genomes,
            s,
            valid_species,
            base_url,
            cookies,
            assembly=opts.assembly,
            downloader=downloader,
        )
        if not res:
            logging.error("No files downloaded")
        gff, fa = res.get("gff"), res.get("cds")
        if opts.format:
            format_bed_and_cds(s, gff, fa)