示例#1
0
def test_write_file_yaml(tmpdir):
    """Test write_file YAML file output."""
    path = tmpdir.mkdir("sub").join("outfile.yaml")
    pathname = abspath(path)
    tofile.write_file(pathname, VALUE)
    assert len(tmpdir.listdir()) == 1
    assert path.read() == EXAMPLE_YAML
    tmpdir.remove()
示例#2
0
def test_write_file_json(tmpdir):
    """Test write_file JSON file output."""
    path = tmpdir.mkdir("sub").join("outfile.json")
    pathname = abspath(path)
    tofile.write_file(pathname, VALUE)
    assert len(tmpdir.listdir()) == 1
    assert path.read() == EXAMPLE_JSON
    tmpdir.remove()
示例#3
0
def test_write_file_yaml_gz(tmpdir):
    """Test write_file gzipped YAML output."""
    path = tmpdir.mkdir("sub").join("outfile.yaml.gz")
    pathname = abspath(path)
    tofile.write_file(pathname, VALUE)
    assert len(tmpdir.listdir()) == 1
    with open(pathname, "rb") as fh:
        assert binascii.hexlify(fh.read(2)) == b"1f8b"
    tmpdir.remove()
示例#4
0
def test_write_file_plain(tmpdir):
    """Test write_file text file output."""
    path = tmpdir.mkdir("sub").join("outfile.txt")
    pathname = abspath(path)
    example_string = "file content\n"
    tofile.write_file(pathname, example_string)
    assert len(tmpdir.listdir()) == 1
    assert path.read() == example_string
    tmpdir.remove()
示例#5
0
def test_write_file_invalid_path():
    """Test write_file to bad path."""
    example_string = "file content\n"
    assert tofile.write_file("path/does/not/exist", example_string) is False
    assert tofile.write_file("path/does/not/exist.gz", example_string) is False
示例#6
0
def main():
    """Entry point."""
    opts = docopt(__doc__)
    accession = opts["<ACCESSION>"]
    outdir = opts["--out"]
    dbdir = opts["--db"]
    buscodir = "%s/busco" % dbdir
    uniprotdir = "%s/uniprot" % dbdir
    ntdir = "%s/nt" % dbdir
    taxdumpdir = "%s/taxdump" % dbdir
    if opts["--db-suffix"]:
        buscodir += "_%s" % opts["--db-suffix"]
        ntdir += "_%s" % opts["--db-suffix"]
        uniprotdir += "_%s" % opts["--db-suffix"]
        taxdumpdir += "_%s" % opts["--db-suffix"]
    if not outdir.endswith(accession):
        outdir += "/%s" % accession
    os.makedirs(outdir, exist_ok=True)
    meta = parse_assembly_meta(accession)
    assembly_url = fetch_assembly_url(accession, opts["--api-key"])
    if assembly_url is None:
        LOGGER.error("Unable to find assembly URL")
        sys.exit(1)
    assembly_file = "%s/assembly/%s.fasta.gz" % (outdir, accession)
    meta["assembly"].update({"file": assembly_file, "url": assembly_url})
    assembly_report = "%s/assembly/%s.report.txt" % (outdir, accession)
    syn_filename = "%s/assembly/%s.synonyms.tsv" % (outdir, accession)
    cat_filename = "%s/assembly/%s.categories.tsv" % (outdir, accession)
    meta["fields"] = {
        "synonyms": {
            "file": syn_filename,
            "prefix": "insdc"
        },
        "categories": {
            "file": cat_filename
        },
    }
    if opts["--download"]:
        os.makedirs(buscodir, exist_ok=True)
        os.makedirs("%s/assembly" % outdir, exist_ok=True)
        fetch_assembly_fasta(assembly_url, assembly_file)
        report_url = assembly_url.replace("_genomic.fna.gz",
                                          "_assembly_report.txt")
        fetch_assembly_report(report_url, assembly_report, cat_filename,
                              syn_filename)
    taxon_meta = fetch_goat_data(meta["taxon"]["taxid"])
    add_taxon_to_meta(meta, taxon_meta)
    set_btk_version(meta)
    busco_sets = find_busco_lineages(taxon_meta["lineage"])
    if busco_sets:
        meta["busco"].update({
            "download_dir":
            buscodir,
            "lineages":
            busco_sets,
            "basal_lineages": [
                "eukaryota_odb10",
                "bacteria_odb10",
                "archaea_odb10",
            ],
        })
    if opts["--download"]:
        fetch_busco_lineages(busco_sets, buscodir)
    read_accessions = []
    if meta["assembly"]["biosample"]:
        read_accessions = [meta["assembly"]["biosample"]]
    if opts["--reads"]:
        read_accessions += opts["--reads"]
    sra = assembly_reads(read_accessions, int(opts["--read-runs"]),
                         opts["--platforms"])
    if sra:
        if opts["--coverage"]:
            meta["reads"].update(
                {"coverage": {
                    "max": int(opts["--coverage"])
                }})
        readdir = "%s/reads" % outdir
        add_reads_to_meta(meta, sra, readdir)
        if opts["--download"]:
            os.makedirs(readdir, exist_ok=True)
            for library in sra:
                fetch_read_files(library)
    meta["similarity"]["blastn"].update({"path": ntdir})
    meta["similarity"]["diamond_blastx"].update({"path": uniprotdir})
    meta["similarity"]["diamond_blastp"].update({"path": uniprotdir})
    meta["settings"]["taxdump"] = taxdumpdir
    tofile.write_file("%s/config.yaml" % outdir, meta)
示例#7
0
def parse_assembly_report(filename, cat_filename, syn_filename):
    """Parse synonyms and assembly level into tsv files."""
    synonyms = []
    categories = []
    cats = {
        "identifier": {
            "index": 4,
            "list": []
        },
        "assembly_role": {
            "index": 1,
            "list": []
        },
        "assembly_level": {
            "index": 3,
            "list": []
        },
        "assembly_unit": {
            "index": 7,
            "list": []
        },
    }
    names = {
        "identifier": {
            "index": 4,
            "list": []
        },
        "name": {
            "index": 0,
            "list": []
        },
        "assigned_name": {
            "index": 2,
            "list": []
        },
        "refseq_accession": {
            "index": 6,
            "list": []
        },
    }
    with tofile.open_file_handle(filename) as fh:
        for line in fh:
            if line.startswith("#"):
                continue
            row = line.rstrip().split("\t")
            for group in (cats, names):
                for obj in group.values():
                    value = row[obj["index"]]
                    obj["list"].append(value)
    header = []
    for key, obj in cats.items():
        if len(set(obj["list"])) > 1:
            header.append(key)
    categories.append(header)
    for idx, value in enumerate(cats[header[0]]["list"]):
        row = [value]
        for key in header[1:]:
            row.append(cats[key]["list"][idx])
        categories.append(row)
    tofile.write_file(cat_filename, categories)
    header = []
    for key, obj in names.items():
        if len(set(obj["list"])) > 1:
            header.append(key)
    synonyms.append(header)
    for idx, value in enumerate(names[header[0]]["list"]):
        row = [value]
        for key in header[1:]:
            row.append(names[key]["list"][idx])
        synonyms.append(row)
    tofile.write_file(syn_filename, synonyms)
示例#8
0
                "bacteria_odb10",
                "archaea_odb10",
            ],
        })
    if opts["--download"]:
        fetch_busco_lineages(busco_sets, buscodir)
    read_accessions = []
    if meta["assembly"]["biosample"]:
        read_accessions = [meta["assembly"]["biosample"]]
    if opts["--reads"]:
        read_accessions += opts["--reads"]
    sra = assembly_reads(read_accessions, int(opts["--read-runs"]),
                         opts["--platforms"])
    if sra:
        if opts["--coverage"]:
            meta["reads"].update(
                {"coverage": {
                    "max": int(opts["--coverage"])
                }})
        readdir = "%s/reads" % outdir
        add_reads_to_meta(meta, sra, readdir)
        if opts["--download"]:
            os.makedirs(readdir, exist_ok=True)
            for library in sra:
                fetch_read_files(library)
    meta["similarity"]["blastn"].update({"path": ntdir})
    meta["similarity"]["diamond_blastx"].update({"path": uniprotdir})
    meta["similarity"]["diamond_blastp"].update({"path": uniprotdir})
    meta["settings"]["taxdump"] = taxdumpdir
    tofile.write_file("%s/config.yaml" % outdir, meta)