Exemplo n.º 1
0
def check_cfg_for_restart_from(cfg):
    if options_storage.restart_from == 'ec' and ("error_correction" not in cfg):
        support.error("failed to restart from read error correction because this stage was not specified!")
    if options_storage.restart_from == 'mc' and ("mismatch_corrector" not in cfg):
        support.error("failed to restart from mismatch correction because this stage was not specified!")
    if options_storage.restart_from == 'as' or options_storage.restart_from.startswith('k'):
        if "assembly" not in cfg:
            support.error("failed to restart from assembling because this stage was not specified!")
        if options_storage.restart_from.startswith('k'):
            correct_k = False
            k_to_check = options_storage.k_mers
            if not k_to_check:
                if options_storage.auto_K_allowed():
                    k_to_check = list(set(options_storage.K_MERS_SHORT + options_storage.K_MERS_150 + options_storage.K_MERS_250))
                else:
                    k_to_check = options_storage.K_MERS_SHORT
            for k in k_to_check:
                if options_storage.restart_from == ("k%d" % k) or options_storage.restart_from.startswith("k%d:" % k):
                    correct_k = True
                    break
            if not correct_k:
                k_str = options_storage.restart_from[1:]
                if k_str.find(":") != -1:
                    k_str = k_str[:k_str.find(":")]
                support.error("failed to restart from K=%s because this K was not specified!" % k_str)
def update_k_mers_in_special_cases(cur_k_mers, RL, log, silent=False):
    if options_storage.auto_K_allowed():
        if RL >= 250:
            if not silent:
                log.info(
                    "Default k-mer sizes were set to %s because estimated "
                    "read length (%d) is equal to or greater than 250" %
                    (str(options_storage.K_MERS_250), RL))
            return options_storage.K_MERS_250
        if RL >= 150:
            if not silent:
                log.info(
                    "Default k-mer sizes were set to %s because estimated "
                    "read length (%d) is equal to or greater than 150" %
                    (str(options_storage.K_MERS_150), RL), log)
            return options_storage.K_MERS_150
    if RL <= max(cur_k_mers):
        new_k_mers = [k for k in cur_k_mers if k < RL]
        if not silent:
            log.info(
                "K-mer sizes were set to %s because estimated "
                "read length (%d) is less than %d" %
                (str(new_k_mers), RL, max(cur_k_mers)), log)
        return new_k_mers
    return cur_k_mers
Exemplo n.º 3
0
def generateK(cfg, log, dataset_data, silent=False):
    if options_storage.args.rna:
        generateK_for_rna(cfg, dataset_data, log)
    elif not options_storage.args.iontorrent:
        RL = support.get_primary_max_reads_length(dataset_data, log, ["merged reads"],
                                                  options_storage.READS_TYPES_USED_IN_CONSTRUCTION)
        if options_storage.auto_K_allowed():
            if RL >= 250:
                if not silent:
                    log.info("Default k-mer sizes were set to %s because estimated "
                             "read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL))
                cfg.iterative_K = options_storage.K_MERS_250
            elif RL >= 150:
                if not silent:
                    log.info("Default k-mer sizes were set to %s because estimated "
                             "read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL))
                cfg.iterative_K = options_storage.K_MERS_150

        if RL <= max(cfg.iterative_K):
            new_k_mers = [k for k in cfg.iterative_K if k < RL]
            if not silent:
                log.info("K-mer sizes were set to %s because estimated "
                         "read length (%d) is less than %d" % (str(new_k_mers), RL, max(cfg.iterative_K)))
            cfg.iterative_K = new_k_mers

    if not isinstance(cfg.iterative_K, list):
        cfg.iterative_K = [cfg.iterative_K]
    cfg.iterative_K = sorted(cfg.iterative_K)
def update_k_mers_in_special_cases(cur_k_mers, RL, log, silent=False):
    if options_storage.auto_K_allowed():
        if RL >= 250:
            if not silent:
                support.warning("Default k-mer sizes were set to %s because estimated "
                                "read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL), log)
            return options_storage.K_MERS_250
        if RL >= 150:
            if not silent:
                support.warning("Default k-mer sizes were set to %s because estimated "
                                "read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL), log)
            return options_storage.K_MERS_150
    return cur_k_mers
Exemplo n.º 5
0
def update_k_mers_in_special_cases(cur_k_mers, RL, log, silent=False):
    if options_storage.auto_K_allowed():
        if RL >= 250:
            if not silent:
                support.warning("Default k-mer sizes were set to %s because estimated "
                                "read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL), log)
            return options_storage.K_MERS_250
        if RL >= 150:
            if not silent:
                support.warning("Default k-mer sizes were set to %s because estimated "
                                "read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL), log)
            return options_storage.K_MERS_150
    return cur_k_mers
Exemplo n.º 6
0
def update_k_mers_in_special_cases(cur_k_mers, RL, log, silent=False):
    if options_storage.auto_K_allowed():
        if RL >= 250:
            if not silent:
                log.info("Default k-mer sizes were set to %s because estimated "
                         "read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL))
            return options_storage.K_MERS_250
        if RL >= 150:
            if not silent:
                log.info("Default k-mer sizes were set to %s because estimated "
                         "read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL))
            return options_storage.K_MERS_150
    if RL <= max(cur_k_mers):
        new_k_mers = [k for k in cur_k_mers if k < RL]
        if not silent:
            log.info("K-mer sizes were set to %s because estimated "
                     "read length (%d) is less than %d" % (str(new_k_mers), RL, max(cur_k_mers)))
        return new_k_mers
    return cur_k_mers
Exemplo n.º 7
0
def check_cfg_for_restart_from(cfg):
    if options_storage.restart_from == 'ec' and ("error_correction"
                                                 not in cfg):
        support.error(
            "failed to restart from read error correction because this stage was not specified!"
        )
    if options_storage.restart_from == 'mc' and ("mismatch_corrector"
                                                 not in cfg):
        support.error(
            "failed to restart from mismatch correction because this stage was not specified!"
        )
    if options_storage.restart_from == 'as' or options_storage.restart_from.startswith(
            'k'):
        if "assembly" not in cfg:
            support.error(
                "failed to restart from assembling because this stage was not specified!"
            )
        if options_storage.restart_from.startswith('k'):
            correct_k = False
            k_to_check = options_storage.k_mers
            if not k_to_check:
                if options_storage.auto_K_allowed():
                    k_to_check = list(
                        set(options_storage.K_MERS_SHORT +
                            options_storage.K_MERS_150 +
                            options_storage.K_MERS_250))
                else:
                    k_to_check = options_storage.K_MERS_SHORT
            for k in k_to_check:
                if options_storage.restart_from == (
                        "k%d" % k) or options_storage.restart_from.startswith(
                            "k%d:" % k):
                    correct_k = True
                    break
            if not correct_k:
                k_str = options_storage.restart_from[1:]
                if k_str.find(":") != -1:
                    k_str = k_str[:k_str.find(":")]
                support.error(
                    "failed to restart from K=%s because this K was not specified!"
                    % k_str)
def check_cfg_for_partial_run(cfg, type='restart-from'):  # restart-from ot stop-after
    if type == 'restart-from':
        check_point = options_storage.restart_from
        action = 'restart from'
        verb = 'was'
    elif type == 'stop-after':
        check_point = options_storage.stop_after
        action = 'stop after'
        verb = 'is'
    else:
        return

    if check_point == 'ec' and ("error_correction" not in cfg):
        support.error("failed to " + action + " 'read error correction' ('" + check_point + "') because this stage " + verb + " not specified!")
    if check_point == 'mc' and ("mismatch_corrector" not in cfg):
        support.error("failed to " + action + " 'mismatch correction' ('" + check_point + "') because this stage " + verb + " not specified!")
    if check_point == 'as' or check_point.startswith('k'):
        if "assembly" not in cfg:
            support.error("failed to " + action + " 'assembling' ('" + check_point + "') because this stage " + verb + " not specified!")
        if check_point.startswith('k'):
            correct_k = False
            k_to_check = options_storage.k_mers
            if not k_to_check:
                if options_storage.auto_K_allowed():
                    k_to_check = list(set(options_storage.K_MERS_SHORT + options_storage.K_MERS_150 + options_storage.K_MERS_250))
                else:
                    k_to_check = options_storage.K_MERS_SHORT
            for k in k_to_check:
                if check_point == ("k%d" % k) or check_point.startswith("k%d:" % k):
                    correct_k = True
                    break
            if not correct_k:
                k_str = check_point[1:]
                if k_str.find(":") != -1:
                    k_str = k_str[:k_str.find(":")]
                support.error("failed to " + action + " K=%s because this K " % k_str + verb + " not specified!")
Exemplo n.º 9
0
def check_cfg_for_partial_run(cfg, type='restart-from'):  # restart-from ot stop-after
    if type == 'restart-from':
        check_point = options_storage.restart_from
        action = 'restart from'
        verb = 'was'
    elif type == 'stop-after':
        check_point = options_storage.stop_after
        action = 'stop after'
        verb = 'is'
    else:
        return

    if check_point == 'ec' and ("error_correction" not in cfg):
        support.error("failed to " + action + " 'read error correction' ('" + check_point + "') because this stage " + verb + " not specified!")
    if check_point == 'mc' and ("mismatch_corrector" not in cfg):
        support.error("failed to " + action + " 'mismatch correction' ('" + check_point + "') because this stage " + verb + " not specified!")
    if check_point == 'as' or check_point.startswith('k'):
        if "assembly" not in cfg:
            support.error("failed to " + action + " 'assembling' ('" + check_point + "') because this stage " + verb + " not specified!")
        if check_point.startswith('k'):
            correct_k = False
            k_to_check = options_storage.k_mers
            if not k_to_check:
                if options_storage.auto_K_allowed():
                    k_to_check = list(set(options_storage.K_MERS_SHORT + options_storage.K_MERS_150 + options_storage.K_MERS_250))
                else:
                    k_to_check = options_storage.K_MERS_SHORT
            for k in k_to_check:
                if check_point == ("k%d" % k) or check_point.startswith("k%d:" % k):
                    correct_k = True
                    break
            if not correct_k:
                k_str = check_point[1:]
                if k_str.find(":") != -1:
                    k_str = k_str[:k_str.find(":")]
                support.error("failed to " + action + " K=%s because this K " % k_str + verb + " not specified!")
Exemplo n.º 10
0
def print_used_values(cfg, log):
    def print_value(cfg, section, param, pretty_param="", margin="  "):
        if not pretty_param:
            pretty_param = param.capitalize().replace('_', ' ')
        line = margin + pretty_param
        if param in cfg[section].__dict__:
            line += ": " + str(cfg[section].__dict__[param])
        else:
            if "offset" in param:
                line += " will be auto-detected"
        log.info(line)

    log.info("")

    # system info
    log.info("System information:")
    try:
        log.info("  SPAdes version: " + str(spades_version).strip())
        log.info("  Python version: " + ".".join(map(str, sys.version_info[0:3])))
        # for more details: '[' + str(sys.version_info) + ']'
        log.info("  OS: " + platform.platform())
        # for more details: '[' + str(platform.uname()) + ']'
    except Exception:
        log.info("  Problem occurred when getting system information")
    log.info("")

    # main
    print_value(cfg, "common", "output_dir", "", "")
    if ("error_correction" in cfg) and (not "assembly" in cfg):
        log.info("Mode: ONLY read error correction (without assembling)")
    elif (not "error_correction" in cfg) and ("assembly" in cfg):
        log.info("Mode: ONLY assembling (without read error correction)")
    else:
        log.info("Mode: read error correction and assembling")
    if ("common" in cfg) and ("developer_mode" in cfg["common"].__dict__):
        if cfg["common"].developer_mode:
            log.info("Debug mode is turned ON")
        else:
            log.info("Debug mode is turned OFF")
    log.info("")

    # dataset
    if "dataset" in cfg:
        log.info("Dataset parameters:")

        if options_storage.args.iontorrent:
            log.info("  IonTorrent data")
        if options_storage.args.bio:
            log.info("  BiosyntheticSPAdes mode")
        if options_storage.args.meta:
            log.info("  Metagenomic mode")
        elif options_storage.args.large_genome:
            log.info("  Large genome mode")
        elif options_storage.args.truseq_mode:
            log.info("  Illumina TruSeq mode")
        elif options_storage.args.isolate:
            log.info("  Isolate mode")
        elif options_storage.args.rna:
            log.info("  RNA-seq mode")
        elif options_storage.args.single_cell:
            log.info("  Single-cell mode")
        else:
            log.info("  Standard mode")
            log.info("  For multi-cell/isolate data we recommend to use '--isolate' option;" \
                     " for single-cell MDA data use '--sc';" \
                     " for metagenomic data use '--meta';" \
                     " for RNA-Seq use '--rna'.")

        log.info("  Reads:")
        dataset_data = pyyaml.load(open(cfg["dataset"].yaml_filename))
        dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(cfg["dataset"].yaml_filename))
        support.pretty_print_reads(dataset_data, log)

    # error correction
    if "error_correction" in cfg:
        log.info("Read error correction parameters:")
        print_value(cfg, "error_correction", "max_iterations", "Iterations")
        print_value(cfg, "error_correction", "qvoffset", "PHRED offset")

        if cfg["error_correction"].gzip_output:
            log.info("  Corrected reads will be compressed")
        else:
            log.info("  Corrected reads will NOT be compressed")

    # assembly
    if "assembly" in cfg:
        log.info("Assembly parameters:")
        if options_storage.auto_K_allowed():
            log.info("  k: automatic selection based on read length")
        else:
            print_value(cfg, "assembly", "iterative_K", "k")
        if options_storage.args.plasmid:
            log.info("  Plasmid mode is turned ON")
        if cfg["assembly"].disable_rr:
            log.info("  Repeat resolution is DISABLED")
        else:
            log.info("  Repeat resolution is enabled")
        if options_storage.args.careful:
            log.info("  Mismatch careful mode is turned ON")
        else:
            log.info("  Mismatch careful mode is turned OFF")
        if "mismatch_corrector" in cfg:
            log.info("  MismatchCorrector will be used")
        else:
            log.info("  MismatchCorrector will be SKIPPED")
        if cfg["assembly"].cov_cutoff == "off":
            log.info("  Coverage cutoff is turned OFF")
        elif cfg["assembly"].cov_cutoff == "auto":
            log.info("  Coverage cutoff is turned ON and threshold will be auto-detected")
        else:
            log.info("  Coverage cutoff is turned ON and threshold is %f" % cfg["assembly"].cov_cutoff)

    log.info("Other parameters:")
    print_value(cfg, "common", "tmp_dir", "Dir for temp files")
    print_value(cfg, "common", "max_threads", "Threads")
    print_value(cfg, "common", "max_memory", "Memory limit (in Gb)", "  ")
    log.info("")
Exemplo n.º 11
0
def print_used_values(cfg, log):
    def print_value(cfg, section, param, pretty_param="", margin="  "):
        if not pretty_param:
            pretty_param = param.capitalize().replace('_', ' ')
        line = margin + pretty_param
        if param in cfg[section].__dict__:
            line += ": " + str(cfg[section].__dict__[param])
        else:
            if param.find("offset") != -1:
                line += " will be auto-detected"
        log.info(line)

    log.info("")

    # system info
    log.info("System information:")
    try:
        log.info("  SPAdes version: " + str(spades_version).strip())
        log.info("  Python version: " + ".".join(map(str, sys.version_info[0:3])))
        # for more details: '[' + str(sys.version_info) + ']'
        log.info("  OS: " + platform.platform())
        # for more deatils: '[' + str(platform.uname()) + ']'
    except Exception:
        log.info("  Problem occurred when getting system information")
    log.info("")

    # main
    print_value(cfg, "common", "output_dir", "", "")
    if ("error_correction" in cfg) and (not "assembly" in cfg):
        log.info("Mode: ONLY read error correction (without assembling)")
    elif (not "error_correction" in cfg) and ("assembly" in cfg):
        log.info("Mode: ONLY assembling (without read error correction)")
    else:
        log.info("Mode: read error correction and assembling")
    if ("common" in cfg) and ("developer_mode" in cfg["common"].__dict__):
        if cfg["common"].developer_mode:
            log.info("Debug mode is turned ON")
        else:
            log.info("Debug mode is turned OFF")
    log.info("")

    # dataset
    if "dataset" in cfg:
        log.info("Dataset parameters:")

        if cfg["dataset"].single_cell:
            log.info("  Single-cell mode")
        else:
            log.info("  Multi-cell mode (you should set '--sc' flag if input data"\
                     " was obtained with MDA (single-cell) technology")
        if cfg["dataset"].iontorrent:
            log.info("  IonTorrent data")

        log.info("  Reads:")
        dataset_data = pyyaml.load(open(cfg["dataset"].yaml_filename, 'r'))
        dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(cfg["dataset"].yaml_filename))
        support.pretty_print_reads(dataset_data, log)

    # error correction
    if "error_correction" in cfg:
        log.info("Read error correction parameters:")
        print_value(cfg, "error_correction", "max_iterations", "Iterations")
        print_value(cfg, "error_correction", "qvoffset", "PHRED offset")

        if cfg["error_correction"].gzip_output:
            log.info("  Corrected reads will be compressed (with gzip)")
        else:
            log.info("  Corrected reads will NOT be compressed (with gzip)")

    # assembly
    if "assembly" in cfg:
        log.info("Assembly parameters:")
        if options_storage.auto_K_allowed():
            log.info("  k: automatic selection based on read length")
        else:
            print_value(cfg, "assembly", "iterative_K", "k")
        if cfg["assembly"].careful:
            log.info("  Mismatch careful mode is turned ON")
        else:
            log.info("  Mismatch careful mode is turned OFF")
        if cfg["assembly"].disable_rr:
            log.info("  Repeat resolution is DISABLED")
        else:
            log.info("  Repeat resolution is enabled")
        if "mismatch_corrector" in cfg:
            log.info("  MismatchCorrector will be used")
        else:
            log.info("  MismatchCorrector will be SKIPPED")

    log.info("Other parameters:")
    print_value(cfg, "common", "tmp_dir", "Dir for temp files")
    print_value(cfg, "common", "max_threads", "Threads")
    print_value(cfg, "common", "max_memory", "Memory limit (in Gb)", "  ")
    log.info("")
Exemplo n.º 12
0
def print_used_values(cfg, log):
    def print_value(cfg, section, param, pretty_param="", margin="  "):
        if not pretty_param:
            pretty_param = param.capitalize().replace('_', ' ')
        line = margin + pretty_param
        if param in cfg[section].__dict__:
            line += ": " + str(cfg[section].__dict__[param])
        else:
            if param.find("offset") != -1:
                line += " will be auto-detected"
        log.info(line)

    log.info("")

    # system info
    log.info("System information:")
    try:
        log.info("  SPAdes version: " + str(spades_version).strip())
        log.info("  Python version: " +
                 ".".join(map(str, sys.version_info[0:3])))
        # for more details: '[' + str(sys.version_info) + ']'
        log.info("  OS: " + platform.platform())
        # for more deatils: '[' + str(platform.uname()) + ']'
    except Exception:
        log.info("  Problem occurred when getting system information")
    log.info("")

    # main
    print_value(cfg, "common", "output_dir", "", "")
    if ("error_correction" in cfg) and (not "assembly" in cfg):
        log.info("Mode: ONLY read error correction (without assembling)")
    elif (not "error_correction" in cfg) and ("assembly" in cfg):
        log.info("Mode: ONLY assembling (without read error correction)")
    else:
        log.info("Mode: read error correction and assembling")
    if ("common" in cfg) and ("developer_mode" in cfg["common"].__dict__):
        if cfg["common"].developer_mode:
            log.info("Debug mode is turned ON")
        else:
            log.info("Debug mode is turned OFF")
    log.info("")

    # dataset
    if "dataset" in cfg:
        log.info("Dataset parameters:")

        if cfg["dataset"].single_cell:
            log.info("  Single-cell mode")
        else:
            log.info("  Multi-cell mode (you should set '--sc' flag if input data"\
                     " was obtained with MDA (single-cell) technology")
        if cfg["dataset"].iontorrent:
            log.info("  IonTorrent data")

        log.info("  Reads:")
        dataset_data = pyyaml.load(open(cfg["dataset"].yaml_filename, 'r'))
        dataset_data = support.relative2abs_paths(
            dataset_data, os.path.dirname(cfg["dataset"].yaml_filename))
        support.pretty_print_reads(dataset_data, log)

    # error correction
    if "error_correction" in cfg:
        log.info("Read error correction parameters:")
        print_value(cfg, "error_correction", "max_iterations", "Iterations")
        print_value(cfg, "error_correction", "qvoffset", "PHRED offset")

        if cfg["error_correction"].gzip_output:
            log.info("  Corrected reads will be compressed (with gzip)")
        else:
            log.info("  Corrected reads will NOT be compressed (with gzip)")

    # assembly
    if "assembly" in cfg:
        log.info("Assembly parameters:")
        if options_storage.auto_K_allowed():
            log.info("  k: automatic selection based on read length")
        else:
            print_value(cfg, "assembly", "iterative_K", "k")
        if cfg["assembly"].careful:
            log.info("  Mismatch careful mode is turned ON")
        else:
            log.info("  Mismatch careful mode is turned OFF")
        if cfg["assembly"].disable_rr:
            log.info("  Repeat resolution is DISABLED")
        else:
            log.info("  Repeat resolution is enabled")
        if "mismatch_corrector" in cfg:
            log.info("  MismatchCorrector will be used")
        else:
            log.info("  MismatchCorrector will be SKIPPED")
        if cfg["assembly"].cov_cutoff == 'off':
            log.info("  Coverage cutoff is turned OFF")
        elif cfg["assembly"].cov_cutoff == 'auto':
            log.info(
                "  Coverage cutoff is turned ON and threshold will be auto-detected"
            )
        else:
            log.info("  Coverage cutoff is turned ON and threshold is " +
                     str(cfg["assembly"].cov_cutoff))

    log.info("Other parameters:")
    print_value(cfg, "common", "tmp_dir", "Dir for temp files")
    print_value(cfg, "common", "max_threads", "Threads")
    print_value(cfg, "common", "max_memory", "Memory limit (in Gb)", "  ")
    log.info("")