def test_Otyping(caplog):
    """
    Giving E.coli fasta genomes with truncated wzx and wzy genes with reference coverage <50 predict O and H antigens
    :return: None
    """
    caplog.set_level(logging.DEBUG)
    file = os.path.join(
        TEST_ROOT, 'Data/Escherichia_O26H11.fasta'
    )  #+","+os.path.join(TEST_ROOT, 'Data/Escherichia.fna')
    tmpdir = tempfile.mkdtemp()
    set_input(input=file,
              cores=4,
              print_sequence=True,
              verify=True,
              output=tmpdir,
              debug=False)

    ectyper.run_program()

    with open(os.path.join(tmpdir, "output.tsv")) as outfp:
        secondrow = outfp.readlines()[1].split("\t")
        Otype = secondrow[2]
        Htype = secondrow[3]

    assert Otype == "-", "Expected no call but reported O-type:" + Otype
    assert Htype == "H11", "Expected H11 but reported H-type:" + Htype
Exemplo n.º 2
0
def test_valid_fastq_file_with_verify(caplog):
    """
    Given a valid fastq file with low genome coverage, test species verification fail
    Use a temp dir for the test output
    :return: None
    """
    file = os.path.join(TEST_ROOT, 'Data/Escherichia.fastq')
    set_input(file, verify=True)
    ectyper.run_program()
    assert "Escherichia coli" in caplog.text
Exemplo n.º 3
0
def test_integration_validfasta_noverify(caplog):
    """
    Tests for fasta files without E.coli species verify function (--verify) do not fail as per issue #76 (https://github.com/phac-nml/ecoli_serotyping/issues/76)
    :return: None
    """
    file = os.path.join(TEST_ROOT, 'Data/Escherichia.fna')
    set_input(file, verify=False)
    ectyper.run_program()
    assert "O103\tH2\tO103:H2" in caplog.text
    assert "Escherichia\t-\tO103\tH2" in caplog.text
Exemplo n.º 4
0
def test_integration_invalid_file(caplog):
    """
    Giving a non-fasta file in fasta-file name.
    :return: None
    """
    caplog.set_level(logging.DEBUG)
    file = os.path.join(TEST_ROOT, 'Data/test_dir/badfasta.fasta')
    set_input(input=file)
    ectyper.run_program()
    assert "Non fasta / fastq file" in caplog.text
Exemplo n.º 5
0
def test_integration_yersinia(caplog):
    """
    Ensure a non-E. coli gets categorized as such
    :return: None
    """
    file = os.path.join(TEST_ROOT, 'Data/Yersinia.fasta')
    set_input(file)
    ectyper.run_program()
    assert "Yersinia pestis" in caplog.text
    assert "WARNING (WRONG SPECIES)" in caplog.text
Exemplo n.º 6
0
def test_valid_fastq_file(caplog):
    """
    Given a valid fastq file, get the correct results.
    Use a temp dir for the test output
    :return: None
    """
    file = os.path.join(TEST_ROOT, 'Data/Escherichia.fastq')
    set_input(file, verify=False)
    ectyper.run_program()
    assert "O22:H8" in caplog.text
Exemplo n.º 7
0
def test_integration_no_file():
    """
    Giving no input to the program.
    :return: None
    """
    file = ''
    set_input(file)
    with pytest.raises(FileNotFoundError) as se:
        ectyper.run_program()
    assert se.type == FileNotFoundError
    assert str(se.value) == "No files were found to run on"
Exemplo n.º 8
0
def test_integration_valid_file(caplog):
    """
    Ensure a valid E. coli fasta passes
    :return: None
    """
    file = os.path.join(TEST_ROOT, 'Data/Escherichia.fna')
    set_input(file)
    ectyper.run_program()
    print(caplog.text)
    assert "PASS (REPORTABLE)" in caplog.text
    assert "O103:H2" in caplog.text
    assert "Escherichia coli" in caplog.text
def test_non_existing_accession_in_meta(caplog):
    """
    GCA_900059685.2 - Streptococcus pneumoniae - ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/900/059/685/GCA_900059685.2_12291_5_44
    is not present in assembly_summary_refseq.txt and is a perfect candidate to try to test
    species identification function
    :param caplog:
    :return:
    """
    caplog.set_level(logging.DEBUG)
    file = os.path.join(TEST_ROOT, 'Data/GCA_900059685.2.fna')
    set_input(input=file, verify=False)
    ectyper.run_program()
    assert "No O and H antigen determinant E.coli genes were found" in caplog.text
Exemplo n.º 10
0
def test_multiple_directories(caplog):
    """
    Check a number of small files, some good, some bad,
    within a nested directory structure.

    :param caplog: Capture logging output for pytest
    :return: None
    """
    the_dir = os.path.join(TEST_ROOT, 'Data/test_dir')
    set_input(the_dir, cores=4, verify=True, print_sequence=True)
    ectyper.run_program()
    assert any([
        True if re.match(
            r".+sample2.+WARNING\s+\(WRONG\s+SPECIES\).+Sample identified as -",
            line) else False for line in caplog.text.splitlines()
    ])  #O148:H44
    assert any([
        True if re.match(
            r".+sample3.+WARNING\s+\(WRONG\s+SPECIES\).+Sample identified as -",
            line) else False for line in caplog.text.splitlines()
    ])  #O148:H44
    assert any([
        True if re.match(
            r".+sample4.+WARNING\s+\(WRONG\s+SPECIES\).+Sample identified as -",
            line) else False for line in caplog.text.splitlines()
    ])  #O148:H44
    assert any([
        True if re.match(
            r".+badfasta.+WARNING\s+\(WRONG\s+SPECIES\).+Non fasta / fastq file",
            line) else False for line in caplog.text.splitlines()
    ])
    assert any([
        True if re.match(
            r".+sample.fasta.+WARNING\s+\(WRONG\s+SPECIES\).+Non fasta / fastq file",
            line) else False for line in caplog.text.splitlines()
    ])
    assert any([
        True if re.match(
            r".+sampletar.+WARNING\s+\(WRONG\s+SPECIES\).+Non fasta / fastq file",
            line) else False for line in caplog.text.splitlines()
    ])
    assert any([
        True if re.match(
            r".+test_junk.+WARNING\s+\(WRONG\s+SPECIES\).+Non fasta / fastq file",
            line) else False for line in caplog.text.splitlines()
    ])
def test_Ecoli_O17H18(caplog):
    caplog.set_level(logging.DEBUG)
    file = os.path.join(TEST_ROOT, 'Data/EscherichiaO17H18.fasta')
    tmpdir = tempfile.mkdtemp()
    set_input(input=file,
              cores=4,
              print_sequence=False,
              verify=True,
              debug=True,
              output=tmpdir)

    ectyper.run_program()

    with open(os.path.join(tmpdir, "output.tsv")) as outfp:
        rows = outfp.readlines()
    secondrow = rows[1:][0]  #check only second row
    assert "Escherichia coli\tO77/O17/O44/O106\tH18\tO77/O17/O44/O106:H18\tWARNING MIXED O-TYPE" in secondrow
def test_Shigella_typing(caplog):
    caplog.set_level(logging.DEBUG)
    file = os.path.join(
        TEST_ROOT, 'Data/DRR015915_Shigella_boydii.fasta'
    )  # +","+os.path.join(TEST_ROOT, 'Data/Escherichia.fna')
    tmpdir = tempfile.mkdtemp()
    set_input(input=file,
              cores=4,
              print_sequence=True,
              debug=True,
              verify=True,
              output=tmpdir)
    ectyper.run_program()

    with open(os.path.join(tmpdir, "output.tsv")) as outfp:
        secondrow = outfp.readlines()[1].split("\t")
        species = secondrow[1]
    assert species == "Shigella boydii"
def test_closeOalles_O42_O28(caplog):
    caplog.set_level(logging.DEBUG)
    file = os.path.join(
        TEST_ROOT, 'Data/EscherichiaO28H5.fasta'
    )  # +","+os.path.join(TEST_ROOT, 'Data/Escherichia.fna')

    tmpdir = tempfile.mkdtemp()
    set_input(input=file,
              cores=4,
              print_sequence=True,
              verify=True,
              debug=False,
              output=tmpdir)
    ectyper.run_program()
    with open(os.path.join(tmpdir, "output.tsv")) as outfp:
        secondrow = outfp.readlines()[1]
    print(secondrow)
    assert re.match(r".+Escherichia coli.+O42\/O28\tH25\tO42\/O28:H25",
                    secondrow)
def test_Ealbertii_1(caplog):  #error
    LOG.info(
        "Starting 1 of 3 test on EnteroBase on sample ESC_HA8355AA_AS: Escherichia albertii O65:H5"
    )
    caplog.set_level(logging.DEBUG)
    file = os.path.join(TEST_ROOT, 'Data/ESC_HA8355AA_AS_Ealberii_O65H5.fasta')
    tmpdir = tempfile.mkdtemp()
    set_input(input=file,
              cores=4,
              print_sequence=True,
              verify=True,
              output=tmpdir)

    ectyper.run_program()
    with open(os.path.join(tmpdir, "output.tsv")) as outfp:
        rows = outfp.readlines()
    secondrow = rows[1:][0]  #remove header line
    assert "Escherichia albertii" in secondrow
    assert "WARNING (WRONG SPECIES)" in secondrow
def test_Ealbertii_3(caplog):
    LOG.info(
        "Starting 3 of 3 test Escherichia albertii O49:NM"
    )  #can not type O49 due to poor sequence quality of uncertainty of wet-lab O49 typing
    caplog.set_level(logging.DEBUG)
    file = os.path.join(TEST_ROOT, 'Data/Ealbertii_O49NM.fasta')

    tmpdir = tempfile.mkdtemp()
    set_input(input=file,
              cores=4,
              print_sequence=True,
              verify=True,
              output=tmpdir)
    ectyper.run_program()

    with open(os.path.join(tmpdir, "output.tsv")) as outfp:
        rows = outfp.readlines()
    secondrow = rows[1:][0]  #check only second row
    assert "Escherichia albertii" in secondrow
    assert "WARNING (WRONG SPECIES)" in secondrow
def test_Ealbertii_2():  #error
    LOG.info(
        "Starting 2 of 3 test on EnteroBase on sample on ESC_HA8509AA_AS: Escherichia albertii O5:H5"
    )

    file = os.path.join(TEST_ROOT, 'Data/ESC_HA8509AA_AS_EalbertiiO5H5.fasta')
    tmpdir = tempfile.mkdtemp()
    set_input(input=file,
              cores=4,
              print_sequence=True,
              verify=True,
              output=tmpdir)
    ectyper.run_program()

    with open(os.path.join(tmpdir, "output.tsv")) as outfp:
        rows = outfp.readlines()
    secondrow = rows[1:][0]  #check only second row

    assert "Escherichia albertii" in secondrow
    assert "WARNING (WRONG SPECIES)" in secondrow
def test_mixofspecies(caplog):
    caplog.set_level(logging.DEBUG)
    file = os.path.join(TEST_ROOT,
                        'Data/Campylobacter.fasta') +","+os.path.join(TEST_ROOT, 'Data/Salmonella.fasta')+","\
                         + os.path.join(TEST_ROOT, 'Data/Escherichia.fastq')
    tmpdir = tempfile.mkdtemp()
    set_input(input=file,
              cores=4,
              print_sequence=True,
              verify=True,
              output=tmpdir)

    ectyper.run_program()

    with open(os.path.join(tmpdir, "output.tsv")) as outfp:
        rows = outfp.readlines()
    rows = rows[1:]  #remove header line

    serovars = []
    genomenames = []
    QCflag = []
    confidence = []
    for row in rows:
        rowlist = row.split("\t")
        print(rowlist)
        serovars.append(rowlist[4])
        genomenames.append(rowlist[1])
        QCflag.append(rowlist[5])
        confidence.append(rowlist[6])

    assert serovars == ['-:-', 'O22:H8', '-:-']
    expectedspecies_list = [
        "Campylobacter jejuni", "Escherichia coli", "Salmonella enterica"
    ]
    for i in range(0, 3):
        assert bool(re.match(expectedspecies_list[i], genomenames[i])) == True
    assert QCflag == [
        "WARNING (WRONG SPECIES)", "PASS (REPORTABLE)",
        "WARNING (WRONG SPECIES)"
    ]
def test_failed_species_identification_nospeciesverify(caplog):
    caplog.set_level(logging.DEBUG)
    file = os.path.join(TEST_ROOT, 'Data/GCF_001672015.1.fna')
    set_input(input=file, verify=False)
    ectyper.run_program()
    assert "GCF_001672015.1\t-\t-\tH8\t-:H8\t-" in caplog.text
def test_failed_species_identification(caplog):
    caplog.set_level(logging.DEBUG)
    file = os.path.join(TEST_ROOT, 'Data/GCF_001672015.1.fna')
    set_input(input=file, verify=True)
    ectyper.run_program()
    assert "GCF_001672015.1\tEscherichia coli\t-\tH8\t-:H8\tWARNING (-:H TYPING)" in caplog.text