Exemplo n.º 1
0
def test_get_core_multi(caplog):
    """
    Getting a multi core genome (4 genomes, having at least 1 member)
    """
    caplog.set_level(logging.DEBUG)
    fams = persf.get_pers(FAMS_BY_STRAIN, FAMILIES, 4, multi=True)
    exp_fams = {num: mems for num, mems in FAMILIES.items() if num in ['3', '5', '1', '12']}
    assert exp_fams == fams
    assert ("The persistent genome contains 4 families with members present in "
            "at least 4 different genomes (100% of the total number of genomes)") in caplog.text
Exemplo n.º 2
0
def test_get_core_strict(caplog):
    """
    Getting a core genome (4 genomes, all having exactly 1 member)
    """
    caplog.set_level(logging.DEBUG)
    fams = persf.get_pers(FAMS_BY_STRAIN, FAMILIES, 4)
    exp_fams = {num: mems for num, mems in FAMILIES.items() if num in ['3', '5']}
    assert exp_fams == fams
    assert ("The core genome contains 2 families, each one having "
            "exactly 4 members, from the 4 different genomes.") in caplog.text
Exemplo n.º 3
0
def test_get_99pers_multi(caplog):
    """
    Getting a multi persistent genome at 99% (ceil) -> 3 genomes with exactly 1member,
    other with anything
    """
    caplog.set_level(logging.DEBUG)
    fams = persf.get_pers(FAMS_BY_STRAIN, FAMILIES, 4, tol=0.99, multi=True)
    exp_fams = {num: mems for num, mems in FAMILIES.items() if num in ['1', '3', '5', '12']}
    assert exp_fams == fams
    assert ("The persistent genome contains 4 families with members present in "
            "at least 4 different genomes (99.0% of the total number of genomes).") in caplog.text
Exemplo n.º 4
0
def test_get_99pers_mixed(caplog):
    """
    Getting a mixed persistent genome at 99% (ceil) -> 4 genomes with exactly 1member
    """
    caplog.set_level(logging.DEBUG)
    fams = persf.get_pers(FAMS_BY_STRAIN, FAMILIES, 4, tol=0.99, mixed=True)
    exp_fams = {num: mems for num, mems in FAMILIES.items() if num in ['3', '5']}
    assert exp_fams == fams
    assert ("The persistent genome contains 2 families, each one having exactly 1 member from at least "
            "99.0% of the genomes (4 genomes). In the remaining "
            "1.0% genomes, there can be 0, 1 or several members.") in caplog.text
Exemplo n.º 5
0
def test_get_99pers_strict(caplog):
    """
    Getting a persistent genome at 99% (ceil) -> 4 genomes with exactly 1member
    """
    caplog.set_level(logging.DEBUG)
    fams = persf.get_pers(FAMS_BY_STRAIN, FAMILIES, 4, tol=0.99)
    exp_fams = {num: mems for num, mems in FAMILIES.items() if num in ['3', '5']}
    assert exp_fams == fams
    assert ("The persistent genome contains 2 families, each one "
            "having exactly 1 member from at least 99.0% of the 4 different genomes "
            "(that is 4 genomes). The other genomes are absent from the family.") in caplog.text
Exemplo n.º 6
0
def test_get_99pers_floor_multi(caplog):
    """
    Getting a multi persistent genome at floor(99%) -> at least 3 genomes (any number of members)
    """
    caplog.set_level(logging.DEBUG)
    fams = persf.get_pers(FAMS_BY_STRAIN, FAMILIES, 4, tol=0.99, floor=True, multi=True)
    exp_fams = {num: mems for num, mems in FAMILIES.items() if num in ['1', '3', '5', '6', '8',
                                                                       '10', '11', '12']}
    assert exp_fams == fams
    assert ("The persistent genome contains 8 families with members present in "
            "at least 3 different genomes (99.0% of the total number of genomes).") in caplog.text
Exemplo n.º 7
0
def test_get_99pers_floor_strict(caplog):
    """
    Getting a strict persistent at floor(99%) -> at least 3 genomes with 1 member, others
    absent.
    """
    caplog.set_level(logging.DEBUG)
    fams = persf.get_pers(FAMS_BY_STRAIN, FAMILIES, 4, tol=0.99, floor=True)
    exp_fams = {num: mems for num, mems in FAMILIES.items() if num in ['3', '5', '8', '10', '11']}
    assert exp_fams == fams
    assert ("The persistent genome contains 5 families, each one "
            "having exactly 1 member from at least 99.0% of the 4 different genomes "
            "(that is 3 genomes). The other genomes are absent from the family.") in caplog.text
Exemplo n.º 8
0
def test_get_99pers_floor_mixed(caplog):
    """
    Getting a mixed persistent at floor(99%) -> at least 3 genomes with 1 member, others
    anything
    """
    caplog.set_level(logging.DEBUG)
    fams = persf.get_pers(FAMS_BY_STRAIN, FAMILIES, 4, tol=0.99, floor=True, mixed=True)
    exp_fams = {num: mems for num, mems in FAMILIES.items() if num in ['1', '3', '5', '8', '10',
                                                                       '11', '12']}
    assert exp_fams == fams
    assert ("The persistent genome contains 7 families, each one having exactly 1 member from at least "
            "99.0% of the genomes (3 genomes). In the remaining "
            "1.0% genomes, there can be 0, 1 or several members.") in caplog.text
Exemplo n.º 9
0
def main(cmd, pangenome, tol, multi, mixed, outputdir, lstinfo_file, floor,
         verbose, quiet):
    """
    Read pangenome and deduce Persistent genome according to the user criteria

    Parameters
    ----------
    pangenome : str
        file containing pangenome
    tol : float
        min % of genomes present in a family to consider it as persistent (between 0 and 1)
    multi : bool
        True if multigenic families are allowed, False otherwise
    mixed : bool
        True if mixed families are allowed, False otherwise
    outputdir : str or None
        Specific directory for the generated persistent genome. If not given, pangenome directory is used.
    lstinfo_file : str
        list of genomes to include in the core/persistent genome. If not given, include all genomes of pan
    floor : bool
        Require at least floor(nb_genomes*tol) genomes if True, ceil(nb_genomes*tol) if False
    verbose : int
        verbosity:
        - defaut 0 : stdout contains INFO, stderr contains ERROR.
        - 1: stdout contains INFO, stderr contains WARNING and ERROR
        - 2: stdout contains (DEBUG), DETAIL and INFO, stderr contains WARNING and ERROR
        - >=15: Add DEBUG in stdout
    quiet : bool
        True if nothing must be sent to stdout/stderr, False otherwise
    """
    # import needed packages
    import logging
    from PanACoTA import utils
    from PanACoTA import utils_pangenome as utilsp
    import PanACoTA.corepers_module.persistent_functions as pers
    from PanACoTA import __version__ as version

    # get pangenome name info
    _, base_pan = os.path.split(pangenome)
    if lstinfo_file:
        _, base_lst = os.path.split(lstinfo_file)
    else:
        base_lst = "all"
    # Define output filename
    output_name = f"PersGenome_{base_pan}-{base_lst}_"
    if floor:
        output_name += "F"
    output_name += str(tol)
    if multi:
        output_name += "-multi.lst"
    elif mixed:
        output_name += "-mixed.lst"
    else:
        output_name += ".lst"
    # Define output directory and filename path
    if not os.path.isdir(outputdir):
        os.makedirs(outputdir)
    outputfile = os.path.join(outputdir, output_name)
    logfile_base = os.path.join(outputdir, "PanACoTA-corepers")
    # level is the minimum level that will be considered.
    # for verbose = 0 or 1, ignore details and debug, start from info
    if verbose <= 1:
        level = logging.INFO
    # for verbose = 2, ignore only debug
    if verbose >= 2 and verbose < 15:
        level = 15  # int corresponding to detail level
    # for verbose >= 15, write everything
    if verbose >= 15:
        level = logging.DEBUG
    utils.init_logger(logfile_base,
                      level,
                      'corepers',
                      verbose=verbose,
                      quiet=quiet)
    logger = logging.getLogger("corepers")
    logger.info(f'PanACoTA version {version}')
    logger.info("Command used\n \t > " + cmd)

    logger.info(get_info(tol, multi, mixed, floor))

    # Read pangenome
    fams_by_strain, families, all_strains = utilsp.read_pangenome(
        pangenome, logger)
    # If list of genomes given, get subset of previous dicts, including only the genomes aksed
    if lstinfo_file:
        fams_by_strain, families, all_strains = pers.get_subset_genomes(
            fams_by_strain, families, lstinfo_file)
    # Generate persistent genome
    fams = pers.get_pers(fams_by_strain, families, len(all_strains), tol,
                         multi, mixed, floor)
    # Write persistent genome to file
    pers.write_persistent(fams, outputfile)
    logger.info("Persistent genome step done.")
    return outputfile