Exemplo n.º 1
0
def test_create_gen_lst(caplog):
    """
    Check that generated gen and lst files are as expected.
    In the test file, all genomes have names different from gembase name
    This test file contains the following aspects:
    - gene in D strand (start < end)
    - gene in C strand (start > end)
    - CDS features
    - contigs with more than 2 genes
    - contig with only 2 genes (both 'b' loc)
    - contig with 1 gene ('b' loc)
    - contig without gene (should be skipped)
    """
    caplog.set_level(logging.DEBUG)
    genfile = os.path.join(TEST_ANNOTE, "original_name.fna-prodigalRes",
                           "prodigal.outtest.ok.ffn")
    contigs = {
        "JGIKIPgffgIJ": "test.0417.00002.0001",
        "toto": "test.0417.00002.0002",
        "other_header": "test.0417.00002.0003",
        "my_contig": "test.0417.00002.0004",
        "bis": "test.0417.00002.0005",
        "ter": "test.0417.00002.0006",
        "contname": "test.0417.00002.0007"
    }
    name = "test.0417.00002"
    res_gen_file = os.path.join(GENEPATH, "prodigal_res.gen")
    res_lst_file = os.path.join(GENEPATH, "prodigal_res.lst")
    gpath = "original_genome_name"
    assert prodigalfunc.create_gene_lst(contigs, genfile, res_gen_file,
                                        res_lst_file, gpath, name)
    exp_lst = os.path.join(EXP_ANNOTE, "res_create_gene_lst_prodigal.lst")
    assert tutil.compare_order_content(exp_lst, res_lst_file)
    exp_gen = os.path.join(EXP_ANNOTE, "res_create_gene_lst_prodigal.gen")
    assert tutil.compare_order_content(exp_gen, res_gen_file)
Exemplo n.º 2
0
def test_run_prokka_out_doesnt_exist_ok():
    """
    Test that when the output directory does not exist, it creates it, and runs prokka
    with all expected outfiles
    """
    logger = my_logger("test_run_prokka_out_doesnt_exist")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prokka_out_doesnt_exist')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_dir = os.path.join(GENEPATH, "H299_H561.fasta-prokkaRes")
    cores_prokka = 2
    name = "test_runprokka_H299"
    force = False
    nbcont = 3
    trn_file = "nofile.trn"
    arguments = (gpath, GENEPATH, cores_prokka, name, force, nbcont, trn_file,
                 logger[0])
    assert afunc.run_prokka(arguments)
    # Check content of tbl, ffn and faa files
    exp_dir = os.path.join(EXP_DIR,
                           "H299_H561.fasta-short-contig.fna-prokkaRes",
                           "test_runprokka_H299")
    out_tbl = os.path.join(out_dir, name + ".tbl")
    out_faa = os.path.join(out_dir, name + ".faa")
    out_ffn = os.path.join(out_dir, name + ".ffn")
    out_gff = os.path.join(out_dir, name + ".gff")
    assert os.path.isfile(out_tbl)
    # For tbl file, check that, at least, the 3 contigs were considered,
    # and that the number of CDS is as expected.
    # Before, we checked that the output
    # was exactly as expected. But it changes with the different versions of prokka, so
    # we cannot compare the whole file.
    with open(out_tbl, "r") as outt:
        lines = [line.strip() for line in outt.readlines()]
        # Check that there are 3 contigs
        feature = 0
        for line in lines:
            if 'Feature' in line:
                feature += 1
        assert feature == 3
        # Check that there are 16 CDS
        CDS = 0
        for line in lines:
            if "CDS" in line:
                CDS += 1
        assert CDS == 16
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    q = logger[0]
    assert q.qsize() == 3
    assert q.get().message.startswith("Start annotating")
    assert q.get().message == (
        "Prokka command: prokka "
        "--outdir test/data/annotate/generated_by_unit-tests/"
        "H299_H561.fasta-prokkaRes --cpus 2 --prefix test_runprokka_H299 "
        "--centre prokka test/data/annotate/genomes/H299_H561.fasta")
    assert q.get().message.startswith("End annotating")
Exemplo n.º 3
0
def test_run_prodigal_out_exists_force():
    """
    Test that when the output directory already exists with wrong files, but force is on,
    prodigal is rerun and outputs the right files
    """
    logger = my_logger("test_run_prodigal_out_exists_force")
    utils.init_logger(LOGFILE_BASE, 0, 'force')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_prokdir = os.path.join(GENEPATH, "H299_H561.fasta-prodigalRes")
    name = "test_runprodigal_H299"
    # Put empty tbl, faa, ffn files in prodigal output dir, to check that they are overridden
    os.makedirs(out_prokdir)
    open(os.path.join(out_prokdir, name + ".gff"), "w").close()
    open(os.path.join(out_prokdir, name + ".faa"), "w").close()
    open(os.path.join(out_prokdir, name + ".ffn"), "w").close()
    cores_prodigal = 2
    force = True
    nbcont = 3
    trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert afunc.run_prodigal(arguments)
    # As we used 'force', tbl, faa and ffn files, which were empty, must have been replaced
    # by the prodigal output
    exp_dir = os.path.join(EXP_DIR, "H299_H561.fasta-prodigalRes",
                           "ESCO.1015.00001")
    out_gff = os.path.join(out_prokdir, name + ".gff")
    out_faa = os.path.join(out_prokdir, name + ".faa")
    out_ffn = os.path.join(out_prokdir, name + ".ffn")
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    q = logger[0]
    assert q.qsize() == 4
    assert q.get().message.startswith(
        "Prodigal results folder already exists, but is "
        "removed because --force option was used")
    assert q.get().message.startswith(
        "Start annotating test_runprodigal_H299 (from test/data/"
        "annotate/genomes/H299_H561.fasta sequence) "
        "with Prodigal")
    assert q.get().message.startswith(
        "Prodigal command: prodigal -i test/data/annotate/genomes/"
        "H299_H561.fasta -d test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.ffn -a test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.faa -f gff -o test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.gff -t "
        "test/data/annotate/test_files/A_H738-and-B2_A3_5.fna.trn "
        "-q")
    assert q.get().message.startswith(
        "End annotating test_runprodigal_H299 "
        "(from test/data/annotate/genomes/H299_H561.fasta)")
Exemplo n.º 4
0
def test_convert_phylip(caplog):
    """
    Test that when giving a valid fasta alignment file, it converts it to Stockholm format,
    as expected.
    """
    caplog.set_level(logging.DEBUG)
    outfile = os.path.join(GENEPATH, "test_2phylip")
    fme.convert2phylip(ALIGNMENT, outfile)
    exp_stk = os.path.join(EXPPATH, "exp_align_phylip.ph")
    assert os.path.isfile(outfile)
    tutil.compare_order_content(outfile, exp_stk)
    assert "Converting fasta alignment to PHYLIP-relaxed format" in caplog.text
Exemplo n.º 5
0
def test_handle_genome_formatok_prodigal(caplog):
    """
    Test that when we try to format a genome which was annotated by prodigal without any problem
    It returns True associated with the genome name
    """
    caplog.set_level(logging.DEBUG)
    name_orig = "prodigal.outtest.ok"
    name = "test.0417.00002"
    # path to original genome, given to prodigal for annotation
    gpath = os.path.join(ANNOTEDIR, "test_files", "original_name.fna")
    prodi_path = os.path.join(ANNOTEDIR, "test_files")
    # Create result directories
    prot_dir = os.path.join(GENEPATH, "Proteins")
    lst_dir = os.path.join(GENEPATH, "LSTINFO")
    rep_dir = os.path.join(GENEPATH, "Replicons")
    gene_dir = os.path.join(GENEPATH, "Genes")
    gff_dir = os.path.join(GENEPATH, "gff")
    os.makedirs(prot_dir)
    os.makedirs(lst_dir)
    os.makedirs(rep_dir)
    os.makedirs(gene_dir)
    os.makedirs(gff_dir)
    # Get args for function
    args = (name_orig, name, gpath, prodi_path, lst_dir, prot_dir, gene_dir,
            rep_dir, gff_dir, True, my_logger()[0])
    ok_format, genome = ffunc.handle_genome(args)
    assert ok_format == True
    assert genome == name_orig
    # Check generated files
    exp_rep = os.path.join(EXP_ANNOTE, "res_created_rep-prokka.fna")
    res_rep_file = os.path.join(rep_dir, "test.0417.00002.fna")
    assert tutil.compare_order_content(exp_rep, res_rep_file)
    # Proteins
    exp_prt = os.path.join(EXP_ANNOTE, "res_create_prt_prodigal.faa")
    res_prt_file = os.path.join(prot_dir, "test.0417.00002.prt")
    assert tutil.compare_order_content(exp_prt, res_prt_file)
    # Genes
    exp_gen = os.path.join(EXP_ANNOTE, "res_create_gene_lst_prodigal.gen")
    res_gen_file = os.path.join(gene_dir, "test.0417.00002.gen")
    assert tutil.compare_order_content(exp_gen, res_gen_file)
    # LSTINFO
    exp_lst = os.path.join(EXP_ANNOTE, "res_create_gene_lst_prodigal.lst")
    res_lst_file = os.path.join(lst_dir, "test.0417.00002.lst")
    assert tutil.compare_order_content(exp_lst, res_lst_file)
    # gff
    exp_gff = os.path.join(EXP_ANNOTE, "res_create_gff_prodigal.gff")
    res_gff_file = os.path.join(gff_dir, "test.0417.00002.gff")
    assert tutil.compare_order_content(exp_gff, res_gff_file)
Exemplo n.º 6
0
def test_main_onexistingprodigaldir_train_exists(capsys):
    """
    Test that, when the pipeline is run with a given prodigal dir, where prodigal results already
    exist, and are ok, all runs well, no re-annotation, just format

    - no train
    - no reannote
    - format

    2 genomes in list file: B2_A3_5.fasta-changeName.fna and H299_H561.fasta
    """
    # FOLDER with all results
    # Create result folder, with existing prodigal folders (which are OK)
    res_folder = os.path.join(GENEPATH, "results-prodigal")
    os.makedirs(res_folder)
    # copy prodigalRes folders
    B2_A3_5_folder = os.path.join(EXP_DIR, "B2_A3_5.fasta-changeName.fna-prodigalRes")
    H299_folder = os.path.join(EXP_DIR, "H299_H561.fasta-prodigalRes")
    res_B2_A3_5_folder = os.path.join(res_folder, "B2_A3_5.fasta-changeName.fna-prodigalRes")
    res_H299_folder = os.path.join(res_folder, "H299_H561.fasta-prodigalRes")
    shutil.copytree(B2_A3_5_folder, res_B2_A3_5_folder)
    shutil.copytree(H299_folder, res_H299_folder)
    # Add a training file in result folder
    trn_file = os.path.join(res_folder, "H299_H561.fasta.trn")
    open(trn_file, "w").close()

    # Function arguments
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")
    name = "ESCO"
    date = "0417"
    lstout = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-exist_dir.lst")
    lstexp = os.path.join(EXP_DIR, "exp_LSTINFO-func-annot_exists-prokkadir.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, cutn=0,
                      res_annot_dir=res_folder, verbose=3, prodigal_only=True) == (lstout, 2)
    out, err = capsys.readouterr()
    # Check that tmp files folder is empty (prokka res are somewhere else)
    assert len(os.listdir(os.path.join(GENEPATH, "tmp_files"))) == 0
    # Test that result files are in result dir
    assert os.path.isfile(lstout)
    assert tutil.compare_order_content(lstout, lstexp)
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("A training file already exists (test/data/annotate/generated_by_func-tests/"
            "results-prodigal/H299_H561.fasta.trn). It will be used to annotate "
            "all genomes.") in " ".join(log_content)
    assert ("Prodigal results folder test/data/annotate/generated_by_func-tests/"
            "results-prodigal/H299_H561.fasta-prodigalRes "
            "already exists") in " ".join(log_content)
    assert ("Prodigal results folder test/data/annotate/generated_by_func-tests/"
            "results-prodigal/B2_A3_5.fasta-changeName.fna-prodigalRes "
            "already exists") in " ".join(log_content)
    assert ("Prodigal did not run again. Formatting step will use already generated results of "
            "Prodigal in test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes. "
            "If you want to re-run Prodigal, first remove this result folder, or use '-F' or "
            "'--force' option.") in ' '.join(log_content)
    assert "Formatting all genomes" in " ".join(log_content)
    assert "Annotation step done" in " ".join(log_content)
Exemplo n.º 7
0
def test_analyse1genome_cut_prodigal():
    '''
    Analyse the given genome, cutting at stretches of 5N, in order to annotate it
    Create new genome file in outdir, calculate genome size, nb contigs and L90, and add it
    to the genomes dict, as well as the path to the genome file.
    '''
    gs = ["genome1.fasta", "genome2.fasta", "genome3.fasta"]
    genomes = {
        gs[0]: ["SAEN.1113"],
        gs[1]: ["SAEN.1114"],
        gs[2]: ["ESCO.0416"]
    }
    genome = gs[1]
    cut = True
    pat = "NNNNN+"
    soft = "prodigal"
    assert gfunc.analyse_genome(genome, GEN_PATH, GENEPATH, cut, pat, genomes,
                                soft, logger)

    # Check that information on analyzed genome are correct. And path to 'genome to annotate'
    # is the same as the path to the genome itself
    initf = os.path.join(GEN_PATH, "genome2.fasta")  # initial genome path
    outf = os.path.join(
        GENEPATH,
        "genome2.fasta_prodigal-split5N.fna")  # path to geerated genome
    exp_out = os.path.join(EXP_DIR,
                           "genome2-split5N.fna")  # expected generated genome
    assert os.path.isfile(outf)
    assert tutil.compare_order_content(outf, exp_out)
    exp_genomes = {
        gs[0]: ["SAEN.1113"],
        gs[1]: ["SAEN.1114", initf, outf, 55, 5, 4],
        gs[2]: ["ESCO.0416"]
    }
    assert genomes == exp_genomes
Exemplo n.º 8
0
def test_main_default(capsys):
    """
    Test that with default parameters, it creates the expected core genome.
    """
    tol = 1
    multi = False
    mixed = False
    cmd = "cmd"
    floor = False
    verbose = 0
    quiet = False
    lstinfo = ""
    out_pers = os.path.join(GENEPATH, "PersGenome_pangenome.lst-all_1.lst")
    assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo,
                         floor, verbose, quiet) == out_pers
    # Check creation of binary file for pangenome, and remove it
    assert os.path.isfile(UPAN + ".bin")
    # Check presence of persistent genome, and its content, and remove it
    exp_pers = os.path.join(EXP_PATH, "exp_coregenome.txt")
    assert os.path.isfile(out_pers)
    assert tutil.compare_order_content(out_pers, exp_pers)
    # Check presence of log files and remove them
    logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log")
    assert os.path.isfile(logfile)
    assert os.path.isfile(logfile + ".err")
    # Check log messages
    out, err = capsys.readouterr()
    assert "Will generate a CoreGenome." in out
    assert "Saving all information to a binary file for later use" in out
    assert "Generating Persistent genome of a dataset containing 4 genomes" in out
    assert ("The core genome contains 2 families, each one having exactly "
            "4 members, from the 4 different genomes.") in out
Exemplo n.º 9
0
def test_main_frominfo(capsys):
    """
    test that it runs well when giving an info file instead of list file + db etc.
    It does not re-calculate L90 and nbcont
    """
    listfile = None
    dbpath = None
    name = "TOTO"
    date = "1205"
    infofile = os.path.join(TEST_DIR, "lstinfo.lst")
    out_infofile = os.path.join(GENEPATH, "LSTINFO-lstinfo.lst")
    assert annot.main("cmd", listfile, dbpath, GENEPATH, name, date, from_info=infofile,
                      prodigal_only=True) == (out_infofile, 3)
    out, err = capsys.readouterr()
    # Check logs
    assert ("Generating distribution of L90 and #contigs graphs.") in out

    # Check output files present
    protdir = os.path.join(GENEPATH, "Proteins")
    assert len(os.listdir(protdir)) == 3
    gffdir = os.path.join(GENEPATH, "gff3")
    assert len(os.listdir(gffdir)) == 3
    lstdir = os.path.join(GENEPATH, "LSTINFO")
    assert len(os.listdir(lstdir)) == 3

    # Check genomes are renamed as expected, and with expected L90/nbcont values
    exp_lstinfo = os.path.join(EXP_DIR, "exp_LSTINFO-test-main-frominfo.lst")
    res_lstinfo = os.path.join(GENEPATH, "LSTINFO-lstinfo.lst")
    assert tutil.compare_order_content(exp_lstinfo, res_lstinfo)
Exemplo n.º 10
0
def test_create_gff(caplog):
    """
    Check generated gff file. Must have all sequences in header (even replicons without gene),
    and then 1 line per gene
    """
    caplog.set_level(logging.DEBUG)
    logger = logging.getLogger("test_prodigal")
    gfffile = os.path.join(TEST_ANNOTE, "original_name.fna-prodigalRes",
                           "prodigal.outtest.ok.gff")
    contigs = {
        "JGIKIPgffgIJ": "test.0417.00002.0001",
        "toto": "test.0417.00002.0002",
        "other_header": "test.0417.00002.0003",
        "my_contig": "test.0417.00002.0004",
        "bis": "test.0417.00002.0005",
        "ter": "test.0417.00002.0006",
        "contname": "test.0417.00002.0007"
    }
    sizes = {
        "test.0417.00002.0001": 84,
        "test.0417.00002.0002": 103,
        "test.0417.00002.0003": 122,
        "test.0417.00002.0004": 35,
        "test.0417.00002.0005": 198,
        "test.0417.00002.0006": 128,
        "test.0417.00002.0007": 85,
    }
    res_gff_file = os.path.join(GENEPATH, "prodigal_res.gff")
    exp_lst = os.path.join(EXP_ANNOTE, "res_create_gene_lst_prodigal.lst")
    gpath = "original_genome_name"
    assert prodigalfunc.create_gff(gpath, gfffile, res_gff_file, exp_lst,
                                   contigs, sizes)
    exp_gff = os.path.join(EXP_ANNOTE, "res_create_gff_prodigal.gff")
    assert tutil.compare_order_content(exp_gff, res_gff_file)
Exemplo n.º 11
0
def test_format_contig_cut():
    """
    For a given contig, if we want to annotate it, and cut at each stretch of 5 'N'
    check that it writes this contig, split, in the expected file
    """
    cut = True
    pat = 'NNNNN+'
    cur_seq = "AACTGCTTTTTAAGCGCGCTCCTGCGNNNNNGGTTGTGTGGGCCCAGAGCGAGNCG"
    cur_contig_name = ">my_contig_name for_my_sequence"
    contig_sizes = {}
    resfile = os.path.join(GENEPATH, "test_format_cont_cut5N.fna")
    gresf = open(resfile, "w")
    num = 2

    assert gfunc.format_contig(cut,
                               pat,
                               cur_seq,
                               cur_contig_name,
                               "genome",
                               contig_sizes,
                               gresf,
                               num,
                               logger=None) == 4
    gresf.close()

    exp_file = os.path.join(EXP_DIR, "exp_split_contig_cut3N.fna")
    assert os.path.exists(resfile)
    assert tutil.compare_order_content(resfile, exp_file)
    assert contig_sizes == {
        ">2_my_contig_name for_my_sequence\n": 26,
        ">3_my_contig_name for_my_sequence\n": 25
    }
Exemplo n.º 12
0
def test_concat_quiet(caplog):
    """
    Given a list of families, and a directory where are alignment files, check that the files
    corresponding to the given families are concatenated as expected, and it returns "Done" and
    expected output filename
    """
    caplog.set_level(logging.DEBUG)
    # Prepare aldir with all needed alignment files
    aldir = os.path.join(GENEPATH, "test_concat_aldir")
    dname = "TESTconcat"
    prefix = os.path.join(aldir, dname)
    orig_btr1 = os.path.join(EXPPATH, "exp_aldir", "mafft-prt2nuc.1.aln")
    orig_btr8 = os.path.join(EXPPATH, "exp_aldir-pers", "mafft-prt2nuc.8.aln")
    orig_btr11 = os.path.join(EXPPATH, "exp_aldir-pers",
                              "mafft-prt2nuc.11.aln")
    btr1 = os.path.join(aldir, dname + "-mafft-prt2nuc.1.aln")
    btr8 = os.path.join(aldir, dname + "-mafft-prt2nuc.8.aln")
    btr11 = os.path.join(aldir, dname + "-mafft-prt2nuc.11.aln")
    os.makedirs(aldir)
    shutil.copyfile(orig_btr1, btr1)
    shutil.copyfile(orig_btr8, btr8)
    shutil.copyfile(orig_btr11, btr11)
    # Other parameters, and run concatenation
    fam_nums = [1, 8, 11]
    quiet = True
    output, mess = pal.concat_alignments(fam_nums, prefix, "nucl", quiet)
    assert output == os.path.join(aldir, dname + "-complete.nucl.cat.aln")
    ref_concat = os.path.join(EXPPATH, "exp_concat_4genomes-fam1-8-11.aln")
    assert tutil.compare_order_content(output, ref_concat)
    assert mess == "Done"
    assert "Concatenating all nucl alignment files" in caplog.text
Exemplo n.º 13
0
def test_tbl_to_lst_changed_names(caplog):
    """
    Check that generated lstinfo file is as expected, when the genome name is not the same as
    it already was in the genome given to prokka.
    The test tblfile contains the following aspects:
    - gene in D strand (start < end)
    - gene in C strand (start > end)
    - CDS features (some with all info = ECnumber, gene name, product etc. ;
    some with missing info)
    - tRNA type
    - repeat_region type (*2)
    - contigs with more than 2 genes
    - contig with only 2 genes (both 'b' loc)
    - contig with 1 gene ('b' loc)
    - contig without gene (should be skipped)
    """
    caplog.set_level(logging.DEBUG)
    logger = logging.getLogger("test_prokka")
    tblfile = os.path.join(TEST_ANNOTE, "prokka_out_tbl_changed-contnames.tbl")
    lstfile = os.path.join(GENEPATH, "res_test_tbl2lst.lst")
    contigs = {"toto_1": "test.0417.00002.0001",
               "toto_2": "test.0417.00002.0002",
               "toto_3": "test.0417.00002.0003",
               "toto_4": "test.0417.00002.0004",
               "toto_5": "test.0417.00002.0005",
               "toto_6": "test.0417.00002.0006",
               "toto_7": "test.0417.00002.0007",
              }
    name = "test.0417.00002"
    gpath = "path_to_genome"
    assert prokkafunc.tbl2lst(tblfile, lstfile, contigs, name, gpath)
    exp_lst = os.path.join(EXP_ANNOTE, "res_create_lst-prokka.lst")
    assert tutil.compare_order_content(exp_lst, lstfile)
Exemplo n.º 14
0
def test_write_gene():
    """
    Test that lstinfo line is written as expected when writing info for
    a gene (CDS).
    """
    gtype = "CDS"
    locus_num = "5621221"
    gene_name = "abc"
    product = "new product"
    cont_loc = "i"
    genome = "ESCO.0216.00005"
    cont_num = 15
    ecnum = "454.12.5"
    inf2 = "more information... dfd | with | pipe|characters..."
    db_xref = "mydb|pipe"
    strand = "C"
    start = str(154)
    end = str(656)
    lstfile = os.path.join(GENEPATH, "toto.lst")
    lstopenfile = open(lstfile, "w")
    lst_line = ffunc.write_gene(gtype, locus_num, gene_name, product, cont_loc,
                                genome, cont_num, ecnum, inf2, db_xref, strand,
                                start, end, lstopenfile)
    lstopenfile.close()
    assert lst_line == (
        "154\t656\tC\tCDS\tESCO.0216.00005.0015i_5621221\tabc\t| new product "
        "| 454.12.5 | more information... dfd _ with _ pipe_characters... | "
        "mydb_pipe")
    exp_file = os.path.join(EXP_ANNOTE, "res_test_write_geneCDS.lst")
    assert tutil.compare_order_content(exp_file, lstfile)
Exemplo n.º 15
0
def test_build_bank(caplog):
    """
    Build a protein bank from a list of genomes, and create it at the same
    place as the database.
    """
    caplog.set_level(logging.DEBUG)
    lstinfo = os.path.join(PATH_TEST_FILES, "list_to_pan.txt")
    dbpath = os.path.join(PATH_TEST_FILES, "example_db", "Proteins")
    cur_dbpath = os.path.join(GENEPATH, "Proteins")
    shutil.copytree(dbpath, cur_dbpath)
    name = "EXEM"
    spedir = None
    quiet = True
    outfile = psf.build_prt_bank(lstinfo, cur_dbpath, name, spedir, quiet)
    exp_file = os.path.join(PATH_EXP_FILES, "exp_EXEM.All.prt")
    exp_out = os.path.join(cur_dbpath, name + ".All.prt")

    # Check prt bank filename
    assert outfile == exp_out
    # Check content of bank created
    assert tutil.compare_order_content(exp_file, exp_out)

    # Check logs
    assert ("Building bank with all proteins to test/data/pangenome/"
            "generated_by_unit-tests/Proteins/EXEM.All.prt") in caplog.text
Exemplo n.º 16
0
def test_run_all_pangenome_panexists_ok(caplog):
    """
    Check that, given a prt bank, and a pangenome file,
    it says that pangenome file already exists, and just reads families from it
    """
    caplog.set_level(15)
    min_id = 0.8
    clust_mode = 1
    outdir = os.path.join(GENEPATH, "test_run_allpangenome")
    os.makedirs(outdir)
    prt_path = os.path.join(PATH_EXP_FILES, "exp_EXEM.All.prt")
    threads = 2
    panfile = None
    quiet = False
    # Create pangenome file
    exp_pan = os.path.join(PATH_EXP_FILES, "exp_pangenome-4genomes.lst")
    panfile_out = os.path.join(
        outdir, "PanGenome-exp_EXEM.All.prt-clust-0.8-mode1-th2.lst")
    shutil.copyfile(exp_pan, panfile_out)
    fams, outfile = mmseqs.run_all_pangenome(min_id,
                                             clust_mode,
                                             outdir,
                                             prt_path,
                                             threads,
                                             panfile=panfile,
                                             quiet=quiet)
    # check that tmp dir was created and not empty
    tmp_dir = os.path.join(outdir, "tmp_exp_EXEM.All.prt_0.8-mode1-th2")
    assert not os.path.isdir(tmp_dir)

    # check that pangenome file is present with expected name
    assert panfile_out == outfile
    assert os.path.isfile(panfile_out)

    # Check content of output pangenome file
    assert tutil.compare_order_content(exp_pan, outfile)

    # Check families returned in fams dict.
    for num, fam in fams.items():
        exp_nums = [str(i) for i in range(1, 17)]
        assert num in exp_nums
        found = False
        for expfam in FAMILIES4G:
            if fam == expfam:
                found = True
                break
        assert found

    # Check logs
    assert ("Will run MMseqs2 with:\n\t- minimum sequence identity = 80.0%\n"
            "\t- cluster mode 1") in caplog.text
    assert (
        "Pangenome file "
        "test/data/pangenome/generated_by_unit-tests/test_run_allpangenome/PanGenome-exp_EXEM.All.prt-clust-0.8-mode1-th2.lst "
        "already exists. PanACoTA will read it to get families."
    ) in caplog.text
    assert "Reading and getting information from pangenome file" in caplog.text
    assert caplog.records[0].levelname == "INFO"
    assert caplog.records[1].levelname == "WARNING"
    assert caplog.records[2].levelname == "INFO"
Exemplo n.º 17
0
def test_main_given_tmp_verbose3(capsys):
    """
    Test that when a tmp folder is given by user, tmp files are saved in it,
    and prokka files too.
    + check that, with verbose=3, warning and details are written to stdout

    Giving 4 genomes in list_files
    - for 1 genome, toto.fst does not exist, and will not be in the concatenated file
    - 2 concatenated files
    - 4 files to annotate
    - 4 prokkaRes
    - 1 genome with problems: no CDS found
    - 3 genomes in result dirs
    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-default.txt")
    tmpdir = os.path.join(GENEPATH, "tmp_funcGivenTmp")
    name = "ESCO"
    l90 = 10
    date = "0417"
    verbose = 3
    info_file = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-default.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, l90,
                      cutn=3, tmp_dir=tmpdir, verbose=verbose) == (info_file, 3)
    out, err = capsys.readouterr()
    # Check that warnings are written to stderr
    assert "WARNING" in err
    assert ("toto.fst genome file does not exist. Its file will be ignored when "
            "concatenating ['A_H738.fasta', 'genome1.fasta', 'toto.fst']") in err
    # Check that tmp files exist in the right folder
    # -> 2 fna files created (concatenations)
    # -> + 3 files created (split 5N)
    assert os.path.isfile(os.path.join(tmpdir, "A_H738.fasta-all.fna"))
    assert os.path.isfile(os.path.join(tmpdir, "H299_H561.fasta-all.fna"))
    assert len(glob.glob(os.path.join(tmpdir, '*.fna'))) == 6
    assert len(glob.glob(os.path.join(tmpdir, '*split3N.fna'))) == 4
    # Check that split contigs were renamed with unique ID at the begining of the header
    res_file = os.path.join(tmpdir, "A_H738.fasta-all.fna_prokka-split3N.fna")
    exp_file = os.path.join(EXP_DIR, "exp_A_H738.fasta-all.fna_prokka-split3N.fna")
    assert tutil.compare_order_content(exp_file, res_file)
    # Check that even for complete genome, contig was renamed with ID
    res_file = os.path.join(tmpdir, "complete_genome.fna_prokka-split3N.fna")
    exp_file = os.path.join(EXP_DIR, "exp_complete_genome.fna_prokka-split3N.fna")
    assert tutil.compare_order_content(exp_file, res_file)
    # Test that prokka folder is in the right directory
    # Only 1 genome annotated by  prokka (the 2 others do not have appropriate L90/nbcont)
    assert os.path.isdir(os.path.join(tmpdir, "A_H738.fasta-all.fna_prokka-split3N.fna-prokkaRes"))
    assert not os.path.isdir(os.path.join(tmpdir, "H299_H561.fasta-all.fna-prokkaRes"))
Exemplo n.º 18
0
def test_format_1pb_prodigal(caplog):
    """
    Test that when giving a list of genomes, 1 that is correctly formatted, and 1 has a pb,
    it returns the last one in skipped_format
    """
    caplog.set_level(logging.DEBUG)
    # GENOME 2: Create empty original genome file
    genome1 = "wrong.fasta"
    gpath1 = os.path.join(GENEPATH, "wrong.fasta")
    open(gpath1, "w").close()
    # Add prodigal (empty) result files to prodigalRes directory
    prodi_path = gpath1 + "-prodigalRes"
    os.makedirs(prodi_path)
    gff_res = os.path.join(prodi_path, "toto.gff")
    ffn_res = os.path.join(prodi_path, "toto.ffn")
    faa_res = os.path.join(prodi_path, "toto.faa")
    for file in [gff_res, ffn_res, faa_res]:
        open(file, "w").close()
    # Create output directory for .fna file
    rep_dir = os.path.join(GENEPATH, "Replicons")
    os.makedirs(rep_dir)
    # GENOME 2
    genome2 = "H299_H561.fasta"
    gpath2 = os.path.join(ANNOTEDIR, "genomes", genome2)
    # Copy results of prodigal for this genome to output dir (GENEPATH)
    orig_res_files = os.path.join(EXP_ANNOTE, genome2 + '-prodigalRes')
    used_res_path = os.path.join(GENEPATH, genome2 + "-prodigalRes")
    shutil.copytree(orig_res_files, used_res_path)
    # genomes = {genome: [name, gpath, to_annot, size, nbcont, l90]}
    genomes = {
        genome1: ["test_genome1", gpath1, gpath1, 12656, 3, 1],
        genome2: ["test_runprokka_H299", gpath2, gpath2, 456464645, 5, 1]
    }
    res_path = GENEPATH
    annotated_path = GENEPATH
    # Format both genomes
    skipped_format = ffunc.format_genomes(genomes,
                                          res_path,
                                          annotated_path,
                                          True,
                                          threads=2)
    assert skipped_format == ["wrong.fasta"]
    # Get all names of expected output files
    exp_dir = os.path.join(EXP_ANNOTE, "res_formatAll", "prodigal")
    exp_folders = ["LSTINFO", "Proteins", "Genes", "Replicons", "gff3"]
    exp_extensions = [".lst", ".prt", ".gen", ".fna", ".gff"]
    # Check that output files are created, and contain what is expected
    for fol, ext in zip(exp_folders, exp_extensions):
        exp_file = os.path.join(exp_dir, fol, "test_runprokka_H299" + ext)
        res_file = os.path.join(res_path, fol, "test_runprokka_H299" + ext)
        assert os.path.isfile(res_file)
        assert tutil.compare_order_content(res_file, exp_file)
    # Check log
    assert "Formatting all genomes" in caplog.text
    assert (
        "Your genome test/data/annotate/generated_by_unit-tests/wrong.fasta does not "
        "contain any sequence, or is not in fasta format.") in caplog.text
    assert "Problems while generating Replicon file for test_genome1" in caplog.text
Exemplo n.º 19
0
def test_format_1genome(caplog):
    """
    Test that when prokka results are ok, all files are generated as expected.
    """
    caplog.set_level(logging.DEBUG)
    name = "test.0417.00002"
    # path to original genome, given to prodigal for annotation
    gpath =  os.path.join(TEST_ANNOTE, "original_name.fna")
    prok_path = TEST_ANNOTE
    # Create result directories
    prot_dir = os.path.join(GENEPATH, "Proteins")
    lst_dir = os.path.join(GENEPATH, "LSTINFO")
    rep_dir = os.path.join(GENEPATH, "Replicons")
    gene_dir = os.path.join(GENEPATH, "Genes")
    gff_dir = os.path.join(GENEPATH, "gff")
    os.makedirs(prot_dir)
    os.makedirs(lst_dir)
    os.makedirs(rep_dir)
    os.makedirs(gene_dir)
    os.makedirs(gff_dir)

    assert prokkafunc.format_one_genome(gpath, name, prok_path, lst_dir, prot_dir, gene_dir,
                                        rep_dir, gff_dir)

    # Check output files content
    # Replicons
    exp_rep = os.path.join(EXP_ANNOTE, "res_created_rep-prokka.fna")
    res_rep_file = os.path.join(rep_dir, "test.0417.00002.fna")
    assert tutil.compare_order_content(exp_rep, res_rep_file)
    # Proteins
    exp_prt = os.path.join(EXP_ANNOTE, "res_create_prt_prokka.faa")
    res_prt_file = os.path.join(prot_dir, "test.0417.00002.prt")
    assert tutil.compare_order_content(exp_prt, res_prt_file)
    # Genes
    exp_gen = os.path.join(EXP_ANNOTE, "res_create_gene_prokka.gen")
    res_gen_file = os.path.join(gene_dir, "test.0417.00002.gen")
    assert tutil.compare_order_content(exp_gen, res_gen_file)
    # LSTINFO
    exp_lst = os.path.join(EXP_ANNOTE, "res_create_lst-prokka.lst")
    res_lst_file = os.path.join(lst_dir, "test.0417.00002.lst")
    assert tutil.compare_order_content(exp_lst, res_lst_file)
    # gff
    exp_gff = os.path.join(EXP_ANNOTE, "res_create_gff-prokka.gff")
    res_gff_file = os.path.join(gff_dir, "test.0417.00002.gff")
    assert tutil.compare_order_content(exp_gff, res_gff_file)
Exemplo n.º 20
0
def test_run_prodigal_out_doesnt_exist():
    """
    Test that when the output directory does not exist, it creates it, and runs prodigal
    with all expected outfiles
    """
    logger = my_logger("test_run_prodigal_out_doesnt_exist")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prodigal_out_doesnt_exist')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_dir = os.path.join(GENEPATH, "H299_H561.fasta-prodigalRes")
    cores_prodigal = 2
    name = "test_runprodigal_H299"
    force = False
    trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    nbcont = 3
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert afunc.run_prodigal(arguments)
    # Check content of tbl, ffn and faa files
    exp_dir = os.path.join(EXP_DIR, "H299_H561.fasta-prodigalRes",
                           "ESCO.1015.00001")
    out_faa = os.path.join(out_dir, name + ".faa")
    out_ffn = os.path.join(out_dir, name + ".ffn")
    out_gff = os.path.join(out_dir, name + ".gff")
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".gff", out_gff)
    q = logger[0]
    assert q.qsize() == 3
    assert q.get().message.startswith("Start annotating")
    assert q.get().message == (
        "Prodigal command: prodigal -i test/data/annotate/genomes/"
        "H299_H561.fasta -d test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.ffn -a test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.faa -f gff -o test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.gff -t "
        "test/data/annotate/test_files/A_H738-and-B2_A3_5.fna.trn "
        "-q")
    assert q.get().message.startswith("End annotating")
Exemplo n.º 21
0
def test_run_prodigal_small():
    """
    Test that when the output directory does not exist, it creates it, and runs prodigal
    with all expected outfiles. Here, we run prodigal with --small option (on a small genome)
    """
    logger = my_logger("test_run_prodigal_small")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prodigal_small')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_dir = os.path.join(GENEPATH, "H299_H561.fasta-prodigalRes")
    cores_prodigal = 2
    name = "test_runprodigal_small_H299"
    force = False
    trn_file = "small option"
    nbcont = 3
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert afunc.run_prodigal(arguments)

    # Check content of tbl, ffn and faa files
    exp_dir = os.path.join(EXP_DIR, "H299_H561.fasta_small-prodigalRes",
                           "test_runprodigal_small_H299")
    out_faa = os.path.join(out_dir, name + ".faa")
    out_ffn = os.path.join(out_dir, name + ".ffn")
    out_gff = os.path.join(out_dir, name + ".gff")
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".gff", out_gff)
    # Check logs
    q = logger[0]
    assert q.qsize() == 3
    assert q.get().message.startswith("Start annotating")
    prodigal_cmd = q.get().message
    assert ("Prodigal command: prodigal -i test/data/annotate/genomes/"
            "H299_H561.fasta -d test/data/annotate/"
            "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
            "test_runprodigal_small_H299.ffn -a test/data/annotate/"
            "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
            "test_runprodigal_small_H299.faa -f gff -o test/data/annotate/"
            "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
            "test_runprodigal_small_H299.gff -p meta -q") in prodigal_cmd
    assert q.get().message.startswith("End annotating")
Exemplo n.º 22
0
def test_create_prt(caplog):
    """
    Check that prt file is generated as expected
    """
    caplog.set_level(logging.DEBUG)
    protfile = os.path.join(TEST_ANNOTE, "original_name.fna-prodigalRes",
                            "prodigal.outtest.ok.faa")
    res_prt_file = os.path.join(GENEPATH, "prodigal_res.prt")
    exp_lst = os.path.join(EXP_ANNOTE, "res_create_gene_lst_prodigal.lst")
    assert prodigalfunc.create_prt(protfile, res_prt_file, exp_lst)
    exp_prt = os.path.join(EXP_ANNOTE, "res_create_prt_prodigal.faa")
    assert tutil.compare_order_content(exp_prt, res_prt_file)
Exemplo n.º 23
0
def test_create_gen_missingSeq(caplog):
    """
    Check create gen file. A gene in lst does not have a sequence in ffn.
    Just skip it, and go to next sequence for gen file.
    """
    caplog.set_level(logging.DEBUG)
    logger = logging.getLogger("test_prodigal")
    ffnfile = os.path.join(TEST_ANNOTE, "prokka_out_for_test-noSeqFor1gene.ffn")
    lstfile = os.path.join(EXP_ANNOTE, "res_create_lst-prokka.lst")
    res_gen_file = os.path.join(GENEPATH, "prodigal_res.gen")
    assert prokkafunc.create_gen(ffnfile, lstfile, res_gen_file)
    exp_gen = os.path.join(EXP_ANNOTE, "res_create_gene_prokka-missGene.gen")
    assert tutil.compare_order_content(exp_gen, res_gen_file)
Exemplo n.º 24
0
def test_create_gen(caplog):
    """
    Check create gen file.
    """
    caplog.set_level(logging.DEBUG)
    logger = logging.getLogger("test_prodigal")
    ffnfile = os.path.join(TEST_ANNOTE, "original_name.fna-prokkaRes",
                           "prokka_out_for_test.ffn")
    lstfile = os.path.join(EXP_ANNOTE, "res_create_lst-prokka.lst")
    res_gen_file = os.path.join(GENEPATH, "prodigal_res.gen")
    assert prokkafunc.create_gen(ffnfile, lstfile, res_gen_file)
    exp_gen = os.path.join(EXP_ANNOTE, "res_create_gene_prokka.gen")
    assert tutil.compare_order_content(exp_gen, res_gen_file)
Exemplo n.º 25
0
def test_all_post():
    """
    Check that when running main method of post-treatment, it creates the 3 output files
    expected, with the expected content.
    """
    pangenome = os.path.join(GENEPATH, "test_all_post")
    post.post_treat(FAMILIES, pangenome)

    # Check presence and content of quali matrix file
    assert os.path.isfile(pangenome + ".quali.txt")
    assert tutil.compare_order_content(pangenome + ".quali.txt", EXP_QUALIF)

    # Check presence and content of quanti matrix file
    assert os.path.isfile(pangenome + ".quanti.txt")
    assert tutil.compare_order_content(pangenome + ".quanti.txt", EXP_QUANTIF)

    # Check presence and content of summary file
    assert os.path.isfile(pangenome + ".summary.txt")
    assert tutil.compare_order_content(pangenome + ".summary.txt", EXP_SUMF)

    # Check that bin pangenome file was created (as it did not exist before)
    assert os.path.isfile(pangenome + ".bin")
Exemplo n.º 26
0
def test_write_pers():
    """
    Test that output file is written as expected
    """
    fams = {9: ["member_3", "member_12", "other_member_2"],
            3: ["member_10", "member_100", "member_1"],
            10: ["member_1", "member_2", "member_3"],
            1: ["my_protein_3", "my_protein_12", "my_protein_2"],
            5: ["ESCO.1216.00003.i001_01001", "SAEN.0215.00003.i009_00001",
                "ESCO.1017.00003.b001_00001", "ESCO.0812.00002.i002_02000",
                "ESCO.0812.00003.i002_02000"]}
    outfile = os.path.join(GENEPATH, "test-persistent_families.txt")
    persf.write_persistent(fams, outfile)
    expfile = os.path.join(EXP_PATH, "exp_persgenome1.txt")
    assert tutils.compare_order_content(outfile, expfile)
Exemplo n.º 27
0
def test_open_out():
    """
    Check that given some families and a pagenome file, it creates 3 output files,
    with the expected content (quanti, quali, summary)
    """
    pangenome = os.path.join(GENEPATH, "test_open_out_pangenome.txt")
    res = post.open_outputs_to_write(FAMS_BY_STRAIN, FAMILIES, ALL_STRAINS,
                                     pangenome)

    # Check function output
    qualis, quantis, sums = res
    assert qualis == EXP_QUALIS
    assert quantis == EXP_QUANTIS
    assert sums == EXP_SUMS

    # Check presence and content of quali matrix file
    assert os.path.isfile(pangenome + ".quali.txt")
    assert tutil.compare_order_content(pangenome + ".quali.txt", EXP_QUALIF)
    # Check presence and content of quanti matrix file
    assert os.path.isfile(pangenome + ".quanti.txt")
    assert tutil.compare_order_content(pangenome + ".quanti.txt", EXP_QUANTIF)
    # Check presence and content of summary file
    assert os.path.isfile(pangenome + ".summary.txt")
    assert tutil.compare_order_content(pangenome + ".summary.txt", EXP_SUMF)
Exemplo n.º 28
0
def test_only_mash(capsys):
    """
    Running only mash step (giving genomes and corresponding LSTINFO file)
    """
    NCBI_species_name = ""
    NCBI_species_taxid = ""
    NCBI_taxid = ""
    NCBI_strains = ""
    NCBI_section = "refseq"
    levels = ""
    outdir = GENEPATH
    tmp_dir = ""
    threads = 1
    norefseq = False
    db_dir = ""
    only_mash = True
    info_file = os.path.join(TEST_DIR, "test_lstinfo_onlymash.lst")
    l90 = 100
    nbcont = 999
    cutn = 5
    min_dist = 1e-4
    max_dist = 0.06
    verbose = 1
    quiet = False
    out_info_file = os.path.join(outdir, "LSTINFO-NA-filtered-0.0001_0.06.txt")
    assert prepare.main("cmd", NCBI_species_name, NCBI_species_taxid,
                        NCBI_taxid, NCBI_strains, levels, NCBI_section, outdir,
                        tmp_dir, threads, norefseq, db_dir, only_mash,
                        info_file, l90, nbcont, cutn, min_dist, max_dist,
                        verbose, quiet) == out_info_file
    out, err = capsys.readouterr()
    assert ("You asked to run only mash steps") in err
    assert ("You want to run only mash steps. Getting information from "
            "test/data/prepare/test_files/test_lstinfo_onlymash.lst") in out
    assert ("Found 5 genomes in total") in out
    assert ("Computing pairwise distances between all genomes") in out
    assert ("Sorting all 5 genomes by quality") in out
    assert ("Final number of genomes in dataset: 1") in out

    # Check output files
    assert len(os.listdir(os.path.join(outdir, "tmp_files"))) == 0
    # Check logfiles are here
    log_files = glob.glob(os.path.join(outdir, "*log*"))
    assert len(log_files) == 3
    # Check content of output lstinfo file
    out_lst = os.path.join(outdir, "LSTINFO-NA-filtered-0.0001_0.06.txt")
    exp_lst = os.path.join(DBDIR, "exp_files", "exp_lstinfo_run_only-mash.lst")
    assert tutil.compare_order_content(out_lst, exp_lst)
Exemplo n.º 29
0
def test_group_by_genome(caplog):
    """
    Test that giving a file with all proteins aligned, a list of genomes, and an output
    filename, it writes in output the alignment grouped by genome and returns True
    """
    caplog.set_level(logging.DEBUG)
    alnfile = os.path.join(TESTPATH, "complete.cat.fictive4genomes.aln")
    all_genomes = [
        "GEN2.1017.00001", "GEN4.1111.00001", "GENO.1017.00001",
        "GENO.1216.00002"
    ]
    outgrp = os.path.join(GENEPATH, "test_group_by_genome")
    args = (all_genomes, alnfile, outgrp)
    assert pal.group_by_genome(args)
    exp_grp = os.path.join(EXPPATH, "exp_fictive.grp.aln")
    assert tutil.compare_order_content(outgrp, exp_grp)
    assert "3 sequences found per genome" in caplog.text
    assert "Writing alignments per genome" in caplog.text
Exemplo n.º 30
0
def test_build_bank_spedir_quiet(caplog):
    """
    Build a protein bank from a list of genomes, and create it in a given output directory.
    """
    caplog.set_level(logging.DEBUG)
    lstinfo = os.path.join(PATH_TEST_FILES, "list_to_pan.txt")
    dbpath = os.path.join(PATH_TEST_FILES, "example_db", "Proteins")
    name = "EXEM"
    spedir = os.path.join(GENEPATH, "test_build_prt", "toto")
    quiet = False
    outfile = psf.build_prt_bank(lstinfo, dbpath, name, spedir, quiet)
    exp_file = os.path.join(PATH_EXP_FILES, "exp_EXEM.All.prt")
    exp_out = os.path.join(spedir, name + ".All.prt")
    assert outfile == exp_out
    assert tutil.compare_order_content(exp_file, exp_out)
    # Check logs
    assert ("Building bank with all proteins to test/data/pangenome/"
            "generated_by_unit-tests/test_build_prt/toto/EXEM.All.prt") in caplog.text