Пример #1
0
def test_all_use_counts():
    """
    Tests that BUSCO runs using arguments provided as a string.
    """
    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_METs_small")
    salmon_dir = os.path.join(base_dir, test_reference, "samples_METs",
                              "salmon_quant")
    output_dir = os.path.join(base_dir, "test_out_V")
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")
    os.system("rm -rf " + output_dir)

    string_arguments = " ".join([
        "all", "--database", "mmetsp", "--sample_dir", sample_dir,
        "--mets_or_mags", "mets", "--out_dir", output_dir, "--busco_threshold",
        str(30), "--individual_or_summary", "individual", '--organisms',
        'Chromera', "--filter_metric", "pid", '--taxonomy_organisms', 'genus',
        "--ref_fasta", "reference.pep.fa", "--reference_dir", reference_dir,
        "--use_salmon_counts", "--salmon_dir", salmon_dir
    ])

    eukulele(string_arguments=string_arguments)

    samplenames = [curr.split(".")[0] for curr in os.listdir(sample_dir)]
    busco_out = os.path.join(output_dir, "busco_assessment", samplenames[0],
                             "individual",
                             "summary_" + samplenames[0] + ".tsv")
    assert os.path.isfile(busco_out)
Пример #2
0
def test_individual():
    '''
    Test running the BUSCO functionality with individually specified
    arguments, rather than a full class/other functional group.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out_E")
    os.system("rm -rf " + output_dir)
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref_MAG")

    string_arguments = " ".join([
        "all", "--database", "mmetsp", "--sample_dir", sample_dir,
        "--mets_or_mags", "mags", "--out_dir", output_dir, "-i", '--organisms',
        'Chromera', '--taxonomy_organisms', 'genus', "--reference_dir",
        reference_dir
    ])

    eukulele(string_arguments=string_arguments)
    samplenames = [curr.split(".")[0] for curr in os.listdir(sample_dir)]
    busco_out = os.path.join(output_dir, "busco_assessment", samplenames[0],
                             "individual",
                             "summary_" + samplenames[0] + ".tsv")
    assert os.path.isfile(busco_out)
Пример #3
0
def test_all_commandline():
    """
    Tests that alignment works properly using a string of arguments.
    """

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_METs_small")
    output_dir = os.path.join(base_dir, "test_out_R")
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")

    #EUKulele alignment --database mmetsp --sample_dir
    # tests/aux_data/mmetsp/samples_METs_small --mets_or_mags mets
    # --out_dir tests/test_out --organisms Chromera --taxonomy_organisms
    # genus --reference_dir tests/aux_data/mmetsp

    string_arguments = " ".join([
        "alignment", "--database", "mmetsp", "--sample_dir", sample_dir,
        "--mets_or_mags", "mets", "--out_dir", output_dir, "--organisms",
        "Chromera", "--ref_fasta", "reference.pep.fa", "--taxonomy_organisms",
        "genus", "--reference_dir", reference_dir
    ])

    eukulele(string_arguments=string_arguments)
    samplenames = [curr.split(".")[0] for curr in os.listdir(sample_dir)]
    est_out = os.path.join(output_dir, "taxonomy_estimation",
                           samplenames[0] + "-estimated-taxonomy.out")
    assert os.path.isfile(est_out)
Пример #4
0
def test_busco_file():
    '''
    Tests the functionality of using a file to specify the organisms
    to explore via BUSCO.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out_K")
    os.system("rm -rf " + output_dir)
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref_MAG")

    string_arguments = " ".join([
        "--database", "mmetsp", "--sample_dir", sample_dir, "--mets_or_mags",
        "mags", "--out_dir", output_dir, "-i", '--busco_file',
        os.path.join(base_dir, test_reference, "samples_MAGs",
                     "test_busco.tsv"), "--reference_dir", reference_dir
    ])
    error = 0
    eukulele(string_arguments=string_arguments)
    samplenames = [curr.split(".")[0] for curr in os.listdir(sample_dir)]
    busco_out = os.path.join(output_dir, "busco_assessment", samplenames[0],
                             "individual",
                             "summary_" + samplenames[0] + ".tsv")
    out_prefix = samplenames[0]

    assert os.path.isfile(busco_out)
Пример #5
0
def test_error_busco():
    '''
    Tests that we get an error when we use a BUSCO file that does
    not exist.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out_K")
    os.system("rm -rf " + output_dir)
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")

    string_arguments = " ".join([
        "--database", "mmetsp", "--sample_dir", sample_dir, "--mets_or_mags",
        "mags", "--out_dir", output_dir, "-i", '--busco_file',
        os.path.join(base_dir, test_reference, "samples_MAGs",
                     "busco_file_fake.tsv"), "--reference_dir", reference_dir
    ])
    error = 0
    try:
        eukulele(string_arguments=string_arguments)
    except:
        error = 1

    assert error == 1
Пример #6
0
def test_tester():
    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out")
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")
    os.system("rm -rf " + output_dir)
    
    string_arguments = " ".join(["setup", "--test", "--database", "mmetsp", "--sample_dir", sample_dir, 
                      "--mets_or_mags", "mags", "--out_dir", output_dir, "--ref_fasta", 
                      "reference.pep.fa", "--reference_dir", reference_dir])
    
    eukulele(string_arguments=string_arguments)
    assert (not os.path.isdir(output_dir))
    
#def test_cleanup():
#    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
#    config_path = os.path.join(os.path.dirname(__file__), '..', 'aux_data', 'test_configs')
#    base_configs = [os.path.join(config_path, 'curr_config_alignment.yaml'),\
#                    os.path.join(config_path, 'curr_config_setup.yaml')]
    
#    successful_test = True
#    for base_config in base_configs:
#        with open(base_config) as f:
#            config = yaml.load(f, Loader=yaml.FullLoader)

#        config["reference"] = os.path.join(base_dir, test_reference)
#       os.system("rm -rf " + os.path.join(config["output"]))

#        successful_test = successful_test & (not os.path.isdir(os.path.join(config["output"])))
#        successful_test = True      
#    assert successful_test
       
Пример #7
0
def test_all():
    '''
    Combined test case.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    base_config = os.path.join(os.path.dirname(__file__), '..', 'aux_data',
                               'config.yaml')
    base_config_curr = os.path.join(os.path.dirname(__file__), '..',
                                    'aux_data', 'config_O.yaml')
    os.system("cp " + base_config + " " + base_config_curr)
    with open(base_config_curr) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    config["mets_or_mags"] = "mags"
    config["reference"] = os.path.join(base_dir, test_reference,
                                       "sample_ref_MAG")
    config["samples"] = os.path.join(base_dir, "real-world-samples", "MAGs")
    config["subroutine"] = "all"
    config["individual_or_summary"] = "summary"
    config["cutoff"] = os.path.join("tax-cutoffs.yaml")
    config["output"] = os.path.join(base_dir, "test_out_all_K")
    config["database"] = test_reference
    config["organisms"] = ["Chromera"]
    config["taxonomy_organisms"] = ["genus"]
    config["download_reference"] = 0
    config["column"] = "SOURCE_ID"
    config["ref_fasta"] = "reference.pep.fa"
    config["protein_map"] = "prot-map.json"
    config["tax_table"] = "tax-table.txt"

    config_path = os.path.join(base_dir, 'test_configs')
    os.system("mkdir -p " + config_path)
    config_file = os.path.join(config_path, 'curr_config_busco_O.yaml')
    with open(config_file, 'w') as f:
        yaml.dump(config, f)

    eukulele(string_arguments=" ".join(["--config", config_file]))
    samplenames = [
        curr.split(".")[0] for curr in os.listdir(config["samples"])
    ]
    busco_out = os.path.join(config["output"], "busco_assessment",
                             samplenames[0], "species_combined",
                             "summary_species_" + samplenames[0] + ".tsv")
    out_prefix = samplenames[0]
    mag_file = os.path.join(config["output"], "levels_mags",
                            out_prefix + '.' + "species")
    assert (os.path.isfile(busco_out)) & (os.path.isfile(mag_file))
Пример #8
0
def test_busco():
    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    base_config = os.path.join(os.path.dirname(__file__), '..', 'aux_data',
                               'config.yaml')
    base_config_curr = os.path.join(os.path.dirname(__file__), '..',
                                    'aux_data', 'config_all.yaml')
    os.system("cp " + base_config + " " + base_config_curr)
    with open(base_config_curr) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    config["mets_or_mags"] = "mags"
    config["reference"] = os.path.join(base_dir, test_reference, "sample_ref")
    config["samples"] = os.path.join(base_dir, test_reference, "samples_MAGs")
    config["cutoff"] = os.path.join("tax-cutoffs.yaml")
    config["output"] = os.path.join(base_dir, "test_out_all")
    config["database"] = test_reference
    config["organisms"] = ["Chromera"]
    config["taxonomy_organisms"] = ["genus"]
    config["download_reference"] = 0
    config["column"] = "SOURCE_ID"
    config["ref_fasta"] = "reference.pep.fa"
    config["protein_map"] = "prot-map.json"
    config["tax_table"] = "tax-table.txt"

    config_path = os.path.join(base_dir, 'test_configs')
    os.system("mkdir -p " + config_path)
    config_file = os.path.join(config_path, 'curr_config_busco_all.yaml')

    config["subroutine"] = "alignment"

    with open(config_file, 'w') as f:
        yaml.dump(config, f)

    eukulele(config=config_file)

    config["subroutine"] = "busco"

    with open(config_file, 'w') as f:
        yaml.dump(config, f)
    eukulele(config=config_file)
    samplenames = [
        curr.split(".")[0] for curr in os.listdir(config["samples"])
    ]
    busco_out = os.path.join(config["output"], "busco_assessment",
                             samplenames[0], "individual",
                             "summary_" + samplenames[0] + ".tsv")
    assert os.path.isfile(busco_out)
Пример #9
0
def test_error_busco_no_orgs():
    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out_H")
    os.system("rm -rf " + output_dir)
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")
    
    string_arguments = " ".join(["--database", "mmetsp", "--sample_dir", sample_dir, 
                                 "--mets_or_mags", "mags", "--out_dir", output_dir, "-i",
                                 "--reference_dir", reference_dir])
    error = 0
    try:
        eukulele(string_arguments=string_arguments)
    except:
        error = 1
    
    assert error == 1
Пример #10
0
def test_setup_blast():
    '''
    Tests setup followed by a BLAST subroutine call.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    base_config = os.path.join(os.path.dirname(__file__), '..', 'aux_data',
                               'config.yaml')
    base_config_curr = os.path.join(os.path.dirname(__file__), '..',
                                    'aux_data', 'config_C.yaml')
    os.system("cp " + base_config + " " + base_config_curr)
    with open(base_config_curr) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    config["mets_or_mags"] = "mags"
    config["reference"] = os.path.join(base_dir, test_reference,
                                       "sample_ref_MAG")
    config["samples"] = os.path.join(base_dir, test_reference, "samples_MAGs")
    config["subroutine"] = "setup"
    config["alignment_choice"] = "blast"
    config["cutoff"] = os.path.join("tax-cutoffs.yaml")
    config["output"] = os.path.join(base_dir, "test_out_C")
    config["database"] = test_reference
    config["download_reference"] = 0
    config["column"] = "SOURCE_ID"
    config["ref_fasta"] = "reference.pep.fa"
    config["protein_map"] = "prot-map.json"
    config["tax_table"] = "tax-table.txt"

    config_path = os.path.join(base_dir, 'test_configs')
    os.system("mkdir -p " + config_path)
    config_file = os.path.join(config_path, 'curr_config_alignment_C.yaml')
    with open(config_file, 'w') as f:
        yaml.dump(config, f)

    eukulele(config=config_file)
    assert os.path.isdir(os.path.join(config["reference"], "blast"))
    config["subroutine"] = "alignment"
    with open(config_file, 'w') as f:
        yaml.dump(config, f)

    eukulele(config=config_file)
    outprefix = config["output"].split("/")[-1]
    assert os.path.isfile(
        os.path.join(config["output"], "taxonomy_counts",
                     outprefix + "_all_species_counts.csv"))
Пример #11
0
def test_setup_commandline():
    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_METs_small")
    output_dir = os.path.join(base_dir, "test_out_Q")
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")
    os.system("rm -rf " + output_dir)
    #subprocess.Popen(["EUKulele", "setup", "--database", "mmetsp", "--sample_dir", sample_dir,
    #                  "--mets_or_mags", "mets", "--out_dir", output_dir,
    #                  "--reference_dir", reference_dir])

    string_arguments = " ".join([
        "setup", "--database", "mmetsp", "--sample_dir", sample_dir,
        "--mets_or_mags", "mets", "--out_dir", output_dir, "--ref_fasta",
        "reference.pep.fa", "--reference_dir", reference_dir
    ])

    eukulele(string_arguments=string_arguments)
    assert os.path.isfile(os.path.join(reference_dir, "tax-table.txt"))
Пример #12
0
def test_error_required_input():
    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out_G")
    os.system("rm -rf " + output_dir)
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")
    
    string_arguments = " ".join(["--database", "mmetsp", "--sample_dir", sample_dir, 
                                 "--out_dir", output_dir, "-i",
                                 '--organisms', 'Chromera', '--taxonomy_organisms', 'genus',
                                 "--reference_dir", reference_dir])
    error = 0
    try:
        eukulele(string_arguments=string_arguments)
    except:
        error = 1
    
    assert error == 1
Пример #13
0
def test_setup():
    '''
    Tests the setup subroutine within EUKulele.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    base_config = os.path.join(os.path.dirname(__file__), '..', 'aux_data',
                               'config.yaml')
    with open(base_config) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    config["mets_or_mags"] = "mags"
    config["reference"] = os.path.join(base_dir, test_reference,
                                       "sample_ref_MAG")
    config["samples"] = os.path.join(base_dir, test_reference, "samples_MAGs")
    config["subroutine"] = "setup"
    config["output"] = os.path.join(base_dir, "test_out_A")
    config["database"] = test_reference
    config["download_reference"] = 0
    config["column"] = "SOURCE_ID"
    config["ref_fasta"] = "reference.pep.fa"
    config["protein_map"] = "prot-map.json"
    config["tax_table"] = "tax-table.txt"

    config_path = os.path.join(base_dir, 'test_configs')
    os.system("mkdir -p " + config_path)
    os.system("rm -rf " + config["output"])
    config_file = os.path.join(config_path, 'curr_config_setup.yaml')
    with open(config_file, 'w') as f:
        yaml.dump(config, f)

    eukulele(config=config_file)
    assert os.path.isfile(
        os.path.join(config["reference"], config["tax_table"]))
    config["subroutine"] = "alignment"
    with open(config_file, 'w') as f:
        yaml.dump(config, f)

    eukulele(config=config_file)
    outprefix = config["output"].split("/")[-1]
    assert os.path.isfile(
        os.path.join(config["output"], "taxonomy_counts",
                     outprefix + "_all_species_counts.csv"))
Пример #14
0
def test_tester():
    '''
    Tests the setup of the tests.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out")
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")
    os.system("rm -rf " + output_dir)

    string_arguments = " ".join([
        "setup", "--test", "--database", "mmetsp", "--sample_dir", sample_dir,
        "--mets_or_mags", "mags", "--out_dir", output_dir, "--ref_fasta",
        "reference.pep.fa", "--reference_dir", reference_dir
    ])

    eukulele(string_arguments=string_arguments)
    assert (not os.path.isdir(output_dir))
Пример #15
0
def test_all_commandline_busco_individual():
    """
    Tests that BUSCO runs using arguments provided as a string.
    """
    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_METs_small")
    output_dir = os.path.join(base_dir, "test_out_T")
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")

    string_arguments = [
        "setup", "--database", "mmetsp", "--sample_dir", sample_dir,
        "--mets_or_mags", "mets", "--out_dir", output_dir,
        "--individual_or_summary", "individual", '--organisms', 'Chromera',
        '--taxonomy_organisms', 'genus', "--ref_fasta", "reference.pep.fa",
        "--reference_dir", reference_dir
    ]

    eukulele(string_arguments=" ".join(string_arguments))
    string_arguments[0] = "alignment"
    eukulele(string_arguments=" ".join(string_arguments))
    string_arguments[0] = "busco"
    eukulele(string_arguments=" ".join(string_arguments))

    samplenames = [curr.split(".")[0] for curr in os.listdir(sample_dir)]
    busco_out = os.path.join(output_dir, "busco_assessment", samplenames[0],
                             "individual",
                             "summary_" + samplenames[0] + ".tsv")
    #busco_out = os.path.join(output_dir, "busco_assessment", samplenames[0],
    #                         "species_combined", "summary_species_" + samplenames[0] + ".tsv")
    assert os.path.isfile(busco_out)
Пример #16
0
def test_error_input():
    '''
    Checking that we can catch an expected error in the input formatting.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out_F")
    os.system("rm -rf " + output_dir)
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref_MAGs")

    string_arguments = " ".join([
        "--database", "mmetsp", "--sample_dir", sample_dir, "--mets_or_mags",
        "mmm", "--out_dir", output_dir, "-i", '--organisms', 'Chromera',
        '--taxonomy_organisms', 'genus', "--reference_dir", reference_dir
    ])
    error = 0
    try:
        eukulele(string_arguments=string_arguments)
    except:
        error = 1

    assert error == 1
Пример #17
0
def test_error_p_extension():
    '''
    Tests that we get an error when we use an improper protein
    extension for the files.
    '''

    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    sample_dir = os.path.join(base_dir, test_reference, "samples_MAGs")
    output_dir = os.path.join(base_dir, "test_out_J")
    os.system("rm -rf " + output_dir)
    reference_dir = os.path.join(base_dir, test_reference, "sample_ref")

    string_arguments = " ".join([
        "--database", "mmetsp", "--sample_dir", sample_dir, "--mets_or_mags",
        "mags", "--out_dir", output_dir, "-i", "--p_ext", ".hello",
        "--reference_dir", reference_dir
    ])
    error = 0
    try:
        eukulele(string_arguments=string_arguments)
    except:
        error = 1

    assert error == 1
Пример #18
0
def test_setup():
    base_dir = os.path.join(os.path.dirname(__file__), '..', 'aux_data')
    base_config = os.path.join(os.path.dirname(__file__), '..', 'aux_data',
                               'config.yaml')
    base_config_curr = os.path.join(os.path.dirname(__file__), '..',
                                    'aux_data', 'config_P.yaml')
    os.system("cp " + base_config + " " + base_config_curr)
    with open(base_config_curr) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    outputdir = os.path.join(base_dir, "test_out_P")
    os.system("rm -rf " + outputdir)

    config["mets_or_mags"] = "mets"
    config["reference"] = os.path.join(base_dir, test_reference, "sample_ref")
    config["samples"] = os.path.join(base_dir, test_reference,
                                     "samples_METs_small")
    config["subroutine"] = "setup"
    config["output"] = outputdir
    config["database"] = test_reference
    config["download_reference"] = 0
    config["column"] = "SOURCE_ID"
    config["nucleotide_extension"] = ".fasta"
    config["ref_fasta"] = "reference.pep.fa"
    config["protein_map"] = "prot-map.json"
    config["tax_table"] = "tax-table.txt"

    config_path = os.path.join(base_dir, 'test_configs')
    os.system("mkdir -p " + config_path)
    config_file = os.path.join(config_path, 'curr_config_setup_P.yaml')
    with open(config_file, 'w') as f:
        yaml.dump(config, f)

    eukulele(config=config_file)
    assert os.path.isfile(
        os.path.join(config["reference"], config["tax_table"]))