def test_get_various_datatypes(tmpdir):
    collection = GenomeCollection(data_dir=str(tmpdir))
    for data_type in ["protein_fasta", "genomic_fasta", "genomic_genbank"]:
        path = collection.get_taxid_genome_data_path(
            taxid=PHAGE_TAXID, data_type=data_type
        )
        assert os.path.exists(path)
def test_blast_against_taxid(tmpdir):
    collection = GenomeCollection(data_dir=str(tmpdir))
    blast_results_file = os.path.join(str(tmpdir), "results.txt")
    queries_file = os.path.join("tests", "queries.fa")
    assert not os.path.exists(blast_results_file)
    collection.blast_against_taxid(
        PHAGE_TAXID,
        "nucl",
        ["blastn", "-query", queries_file, "-out", blast_results_file],
    )
    file_size = os.stat(blast_results_file).st_size
    assert 1200 > file_size > 800
Exemplo n.º 3
0
def test_avoid_matches_with_phage():
    PHAGE_TAXID = "697289"
    collection = GenomeCollection()
    index = collection.get_taxid_bowtie_index_path(PHAGE_TAXID, version="1")
    problem = DnaOptimizationProblem(
        sequence=random_dna_sequence(30, seed=123),
        constraints=[AvoidMatches(bowtie_index=index, match_length=10)],
        logger=None,
    )
    all_breaches = problem.constraints_evaluations().all_locations()
    assert len(all_breaches) == 5
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Exemplo n.º 4
0
def test_avoid_phage_blast_matches():
    PHAGE_TAXID = "697289"
    collection = GenomeCollection()
    blastdb = collection.get_taxid_blastdb_path(PHAGE_TAXID, db_type="nucl")
    problem = DnaOptimizationProblem(sequence=random_dna_sequence(30,
                                                                  seed=123),
                                     constraints=[
                                         AvoidBlastMatches(blast_db=blastdb,
                                                           min_align_length=10,
                                                           word_size=7)
                                     ],
                                     logger=None)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_get_blast_database(tmpdir):
    collection = GenomeCollection(data_dir=str(tmpdir))
    taxid = PHAGE_TAXID
    path = collection.datafile_path(taxid, data_type="blast_nucl")
    assert not os.path.exists(path + ".nsq")

    # Test nucleotide database
    blast_db_path = collection.get_taxid_blastdb_path(taxid, db_type="nucl")
    assert path == blast_db_path
    file_size = os.stat(blast_db_path + ".nsq").st_size
    assert 50_000 > file_size > 30_000

    # Test protein database
    blast_db_path = collection.get_taxid_blastdb_path(taxid, db_type="prot")
    file_size = os.stat(blast_db_path + ".psq").st_size
    assert 60_000 > file_size > 40_000
def test_delete_all_data_files(tmpdir):
    collection = GenomeCollection(data_dir=str(tmpdir))
    taxids = ["224308", "511145", "559292"]
    for taxid in taxids:
        collection.get_taxid_infos(taxid)

    found_taxids = collection.list_locally_available_taxids("infos")
    assert len(found_taxids) == 3
    collection.remove_all_local_data_files()

    found_taxids = collection.list_locally_available_taxids("infos")
    assert len(found_taxids) == 0
def test_autodownload_false(tmpdir):
    collection = GenomeCollection(data_dir=str(tmpdir))
    collection.autodownload = False

    with pytest.raises(FileNotFoundError) as excinfo:
        collection.get_taxid_infos("224308")
    assert "No infos" in str(excinfo.value)

    with pytest.raises(FileNotFoundError) as excinfo:
        collection.get_taxid_genome_data_path("224308")
    assert "No genome" in str(excinfo.value)
Exemplo n.º 8
0
def test_get_biopython_records(tmpdir):
    collection = GenomeCollection(data_dir=str(tmpdir))
    records = collection.get_taxid_biopython_records(PHAGE_TAXID)
    assert len(records) == 1
    assert 168000 < len(records[0]) < 170000
Exemplo n.º 9
0
"""Example of use for AvoidBlastMatches.

In this example we create a 1000bp random sequence, then edit out every match
with E. coli that is 14bp or longer.

"""
from dnachisel import DnaOptimizationProblem, random_dna_sequence, AvoidMatches
from genome_collector import GenomeCollection

# THIS CREATES THE ECOLI BLAST DATABASE ON YOUR MACHINE IF NOT ALREADY HERE

collection = GenomeCollection()
ecoli_index = collection.get_taxid_bowtie_index_path(511145, version="1")

# DEFINE AND SOLVE THE PROBLEM

problem = DnaOptimizationProblem(
    sequence=random_dna_sequence(500, seed=123),
    constraints=[
        AvoidMatches(bowtie_index=ecoli_index, match_length=15, mismatches=1)
    ],
)

print(
    "Constraints validity before optimization\n",
    problem.constraints_text_summary(),
)

print("\nNow resolving the problems\n")
problem.resolve_constraints(final_check=True)
Exemplo n.º 10
0
In this example we create a 1000bp random sequence, then edit out every match
with E. coli that is 14bp or longer.

"""
import os
from genome_collector import GenomeCollection
from dnachisel import (
    DnaOptimizationProblem,
    random_dna_sequence,
    AvoidBlastMatches,
)

# THIS CREATES THE ECOLI BLAST DATABASE ON YOUR MACHINE IF NOT ALREADY HERE

collection = GenomeCollection()
ecoli_blastdb = collection.get_taxid_blastdb_path(511145, db_type="nucl")

# DEFINE AND SOLVE THE PROBLEM

problem = DnaOptimizationProblem(
    sequence=random_dna_sequence(500, seed=123),
    constraints=[
        AvoidBlastMatches(
            blast_db=ecoli_blastdb,
            min_align_length=13,
            perc_identity=100,
            word_size=5, # The bigger the word size, the faster
            e_value=1e20,
            # ungapped=False
        )
# This is the basic example of the README to get you started.

from genome_collector import GenomeCollection
import subprocess

# GET A BLAST PATH
collection = GenomeCollection()
db_path = collection.get_taxid_blastdb_path(taxid=511145, db_type="nucl")

process = subprocess.run([
    "blastn",
    "-db",
    db_path,
    "-query",
    "basic_example_queries.fa",
    "-out",
    "basic_example_results.txt",
],
                         stderr=subprocess.PIPE)
if process.returncode:
    raise OSError("BLAST failed: %s" % process.stderr)

print("All good! see basic_example_results.txt for results.")