Пример #1
0
        os.makedirs(options_storage.output_dir)

    if options_storage.dataset_yaml_filename:
        try:
            dataset_data = pyyaml.load(file(options_storage.dataset_yaml_filename, "r"))
        except pyyaml.YAMLError, exc:
            support.error(
                "exception caught while parsing YAML file (" + options_storage.dataset_yaml_filename + "):\n" + str(exc)
            )
    else:
        dataset_data = support.correct_dataset(dataset_data)
        options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml")
        pyyaml.dump(dataset_data, file(options_storage.dataset_yaml_filename, "w"))

    support.check_dataset_reads(dataset_data, log)
    if support.dataset_has_only_mate_pairs_libraries(dataset_data):
        support.error(
            "you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!"
        )
    if options_storage.rectangles and (len(dataset_data) > 1):
        support.error("rectangle graph algorithm for repeat resolution cannot work with multiple libraries!")

    ### FILLING cfg
    cfg["common"] = empty_config()
    cfg["dataset"] = empty_config()
    if not options_storage.only_assembler:
        cfg["error_correction"] = empty_config()
    if not options_storage.only_error_correction:
        cfg["assembly"] = empty_config()

    # common
Пример #2
0
def fill_cfg(options_to_parse, log):
    try:
        options, not_options = getopt.gnu_getopt(options_to_parse,
                                                 options_storage.short_options,
                                                 options_storage.long_options)
    except getopt.GetoptError:
        _, exc, _ = sys.exc_info()
        sys.stderr.write(str(exc) + "\n")
        sys.stderr.flush()
        options_storage.usage(spades_version)
        sys.exit(1)

    if not options:
        options_storage.usage(spades_version)
        sys.exit(1)

    # all parameters are stored here
    cfg = dict()
    # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs
    dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)]

    options_storage.continue_mode = False
    for opt, arg in options:
        if opt == '-o':
            options_storage.output_dir = arg
        elif opt == "--tmp-dir":
            options_storage.tmp_dir = arg
        elif opt == "--reference":
            options_storage.reference = support.check_file_existence(
                arg, 'reference', log)
        elif opt == "--dataset":
            options_storage.dataset_yaml_filename = support.check_file_existence(
                arg, 'dataset', log)

        elif opt in options_storage.reads_options:
            support.add_to_dataset(opt, arg, dataset_data)

        elif opt == '-k':
            options_storage.k_mers = list(map(int, arg.split(",")))
            for k in options_storage.k_mers:
                if k > 127:
                    support.error(
                        'wrong k value ' + str(k) +
                        ': all k values should be less than 128', log)
                if k % 2 == 0:
                    support.error(
                        'wrong k value ' + str(k) +
                        ': all k values should be odd', log)

        elif opt == "--sc":
            options_storage.single_cell = True
        elif opt == "--disable-gzip-output":
            options_storage.disable_gzip_output = True

        elif opt == "--only-error-correction":
            if options_storage.only_assembler:
                support.error(
                    'you cannot specify --only-error-correction and --only-assembler simultaneously'
                )
            options_storage.only_error_correction = True
        elif opt == "--only-assembler":
            if options_storage.only_error_correction:
                support.error(
                    'you cannot specify --only-error-correction and --only-assembler simultaneously'
                )
            options_storage.only_assembler = True

        elif opt == "--bh-heap-check":
            options_storage.bh_heap_check = arg
        elif opt == "--spades-heap-check":
            options_storage.spades_heap_check = arg

        elif opt == "--continue":
            options_storage.continue_mode = True

        elif opt == '-t' or opt == "--threads":
            options_storage.threads = int(arg)
        elif opt == '-m' or opt == "--memory":
            options_storage.memory = int(arg)
        elif opt == "--phred-offset":
            if int(arg) in [33, 64]:
                options_storage.qvoffset = int(arg)
            else:
                support.error(
                    'wrong PHRED quality offset value ' + str(arg) +
                    ': should be either 33 or 64', log)
        elif opt == '-i' or opt == "--iterations":
            options_storage.iterations = int(arg)

        elif opt == "--debug":
            options_storage.developer_mode = True

        elif opt == "--rectangles":
            options_storage.rectangles = True

        #corrector
        elif opt == "--mismatch-correction":
            options_storage.mismatch_corrector = True

        elif opt == "--careful":
            options_storage.mismatch_corrector = True
            options_storage.careful = True

        elif opt == '-h' or opt == "--help":
            options_storage.usage(spades_version)
            sys.exit(0)
        elif opt == "--help-hidden":
            options_storage.usage(spades_version, True)
            sys.exit(0)

        elif opt == "--test":
            options_storage.set_test_options()
            support.add_to_dataset(
                '-1', os.path.join(spades_home,
                                   "test_dataset/ecoli_1K_1.fq.gz"),
                dataset_data)
            support.add_to_dataset(
                '-2', os.path.join(spades_home,
                                   "test_dataset/ecoli_1K_2.fq.gz"),
                dataset_data)
            #break
        else:
            raise ValueError

    if not options_storage.output_dir:
        support.error(
            "the output_dir is not set! It is a mandatory parameter (-o output_dir).",
            log)
    if not os.path.isdir(options_storage.output_dir):
        if options_storage.continue_mode:
            support.error("the output_dir should exist for --continue!", log)
        os.makedirs(options_storage.output_dir)
    if options_storage.continue_mode:
        return None, None

    if options_storage.dataset_yaml_filename:
        try:
            dataset_data = pyyaml.load(
                open(options_storage.dataset_yaml_filename, 'r'))
        except pyyaml.YAMLError:
            _, exc, _ = sys.exc_info()
            support.error('exception caught while parsing YAML file (' +
                          options_storage.dataset_yaml_filename + '):\n' +
                          str(exc))
        dataset_data = support.relative2abs_paths(
            dataset_data,
            os.path.dirname(options_storage.dataset_yaml_filename))
    else:
        dataset_data = support.correct_dataset(dataset_data)
        dataset_data = support.relative2abs_paths(dataset_data, os.getcwd())
        options_storage.dataset_yaml_filename = os.path.join(
            options_storage.output_dir, "input_dataset.yaml")
        pyyaml.dump(dataset_data,
                    open(options_storage.dataset_yaml_filename, 'w'))

    support.check_dataset_reads(dataset_data, options_storage.only_assembler,
                                log)
    if support.dataset_has_only_mate_pairs_libraries(dataset_data):
        support.error(
            'you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!'
        )
    if options_storage.rectangles and (len(dataset_data) > 1):
        support.error(
            'rectangle graph algorithm for repeat resolution cannot work with multiple libraries!'
        )

    ### FILLING cfg
    cfg["common"] = empty_config()
    cfg["dataset"] = empty_config()
    if not options_storage.only_assembler:
        cfg["error_correction"] = empty_config()
    if not options_storage.only_error_correction:
        cfg["assembly"] = empty_config()

    # common
    cfg["common"].__dict__["output_dir"] = os.path.abspath(
        options_storage.output_dir)
    cfg["common"].__dict__["max_threads"] = options_storage.threads
    cfg["common"].__dict__["max_memory"] = options_storage.memory
    cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode

    # dataset section
    cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell
    cfg["dataset"].__dict__["yaml_filename"] = os.path.abspath(
        options_storage.dataset_yaml_filename)
    if options_storage.developer_mode and options_storage.reference:
        cfg["dataset"].__dict__["reference"] = options_storage.reference

    # error correction
    if (not options_storage.only_assembler) and (options_storage.iterations >
                                                 0):
        cfg["error_correction"].__dict__["output_dir"] = os.path.join(
            cfg["common"].output_dir, "corrected")
        cfg["error_correction"].__dict__[
            "max_iterations"] = options_storage.iterations
        cfg["error_correction"].__dict__[
            "gzip_output"] = not options_storage.disable_gzip_output
        if options_storage.qvoffset:
            cfg["error_correction"].__dict__[
                "qvoffset"] = options_storage.qvoffset
        if options_storage.bh_heap_check:
            cfg["error_correction"].__dict__[
                "heap_check"] = options_storage.bh_heap_check
        if options_storage.tmp_dir:
            cfg["error_correction"].__dict__[
                "tmp_dir"] = options_storage.tmp_dir
        else:
            cfg["error_correction"].__dict__["tmp_dir"] = cfg[
                "error_correction"].output_dir
        cfg["error_correction"].tmp_dir = os.path.join(
            os.path.abspath(cfg["error_correction"].tmp_dir), 'tmp')

    # assembly
    if not options_storage.only_error_correction:
        if options_storage.k_mers:
            cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers
        else:
            cfg["assembly"].__dict__[
                "iterative_K"] = options_storage.k_mers_short
        cfg["assembly"].__dict__["careful"] = options_storage.careful
        if options_storage.spades_heap_check:
            cfg["assembly"].__dict__[
                "heap_check"] = options_storage.spades_heap_check

    #corrector can work only if contigs exist (not only error correction)
    if (not options_storage.only_error_correction
        ) and options_storage.mismatch_corrector:
        cfg["mismatch_corrector"] = empty_config()
        cfg["mismatch_corrector"].__dict__["skip-masked"] = ""
        cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join(
            bin_home, "bwa-spades")
        cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads
        cfg["mismatch_corrector"].__dict__[
            "output-dir"] = options_storage.output_dir

    return cfg, dataset_data
Пример #3
0
def fill_cfg(options_to_parse, log):
    try:
        options, not_options = getopt.gnu_getopt(options_to_parse, options_storage.short_options, options_storage.long_options)
    except getopt.GetoptError:
        _, exc, _ = sys.exc_info()
        sys.stderr.write(str(exc) + "\n")
        sys.stderr.flush()
        options_storage.usage(spades_version)
        sys.exit(1)

    if not options:
        options_storage.usage(spades_version)
        sys.exit(1)

    # all parameters are stored here
    cfg = dict()
    # dataset is stored here. We are prepared for up to MAX_LIBS_NUMBER paired-end libs and MAX_LIBS_NUMBER mate-pair libs
    dataset_data = [{} for i in range(options_storage.MAX_LIBS_NUMBER * 2)]

    options_storage.continue_mode = False
    for opt, arg in options:
        if opt == '-o':
            options_storage.output_dir = arg
        elif opt == "--tmp-dir":
            options_storage.tmp_dir = arg
        elif opt == "--reference":
            options_storage.reference = support.check_file_existence(arg, 'reference', log)
        elif opt == "--dataset":
            options_storage.dataset_yaml_filename = support.check_file_existence(arg, 'dataset', log)

        elif opt in options_storage.reads_options:
            support.add_to_dataset(opt, arg, dataset_data)

        elif opt == '-k':
            options_storage.k_mers = list(map(int, arg.split(",")))
            for k in options_storage.k_mers:
                if k > 127:
                    support.error('wrong k value ' + str(k) + ': all k values should be less than 128', log)
                if k % 2 == 0:
                    support.error('wrong k value ' + str(k) + ': all k values should be odd', log)

        elif opt == "--sc":
            options_storage.single_cell = True
        elif opt == "--disable-gzip-output":
            options_storage.disable_gzip_output = True

        elif opt == "--only-error-correction":
            if options_storage.only_assembler:
                support.error('you cannot specify --only-error-correction and --only-assembler simultaneously')
            options_storage.only_error_correction = True
        elif opt == "--only-assembler":
            if options_storage.only_error_correction:
                support.error('you cannot specify --only-error-correction and --only-assembler simultaneously')
            options_storage.only_assembler = True

        elif opt == "--bh-heap-check":
            options_storage.bh_heap_check = arg
        elif opt == "--spades-heap-check":
            options_storage.spades_heap_check = arg

        elif opt == "--continue":
            options_storage.continue_mode = True

        elif opt == '-t' or opt == "--threads":
            options_storage.threads = int(arg)
        elif opt == '-m' or opt == "--memory":
            options_storage.memory = int(arg)
        elif opt == "--phred-offset":
            if int(arg) in [33, 64]:
                options_storage.qvoffset = int(arg)
            else:
                support.error('wrong PHRED quality offset value ' + str(arg) + ': should be either 33 or 64', log)
        elif opt == '-i' or opt == "--iterations":
            options_storage.iterations = int(arg)

        elif opt == "--debug":
            options_storage.developer_mode = True

        elif opt == "--rectangles":
            options_storage.rectangles = True

        #corrector
        elif opt == "--mismatch-correction":
            options_storage.mismatch_corrector = True

        elif opt == "--careful":
            options_storage.mismatch_corrector = True
            options_storage.careful = True

        elif opt == '-h' or opt == "--help":
            options_storage.usage(spades_version)
            sys.exit(0)
        elif opt == "--help-hidden":
            options_storage.usage(spades_version, True)
            sys.exit(0)

        elif opt == "--test":
            options_storage.set_test_options()
            support.add_to_dataset('-1', os.path.join(spades_home, "test_dataset/ecoli_1K_1.fq.gz"), dataset_data)
            support.add_to_dataset('-2', os.path.join(spades_home, "test_dataset/ecoli_1K_2.fq.gz"), dataset_data)
            #break
        else:
            raise ValueError


    if not options_storage.output_dir:
        support.error("the output_dir is not set! It is a mandatory parameter (-o output_dir).", log)
    if not os.path.isdir(options_storage.output_dir):
        if options_storage.continue_mode:
            support.error("the output_dir should exist for --continue!", log)
        os.makedirs(options_storage.output_dir)
    if options_storage.continue_mode:
        return None, None

    if options_storage.dataset_yaml_filename:
        try:
            dataset_data = pyyaml.load(open(options_storage.dataset_yaml_filename, 'r'))
        except pyyaml.YAMLError:
            _, exc, _ = sys.exc_info()
            support.error('exception caught while parsing YAML file (' + options_storage.dataset_yaml_filename + '):\n' + str(exc))
        dataset_data = support.relative2abs_paths(dataset_data, os.path.dirname(options_storage.dataset_yaml_filename))
    else:
        dataset_data = support.correct_dataset(dataset_data)
        dataset_data = support.relative2abs_paths(dataset_data, os.getcwd())
        options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml")
        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))

    support.check_dataset_reads(dataset_data, options_storage.only_assembler, log)
    if support.dataset_has_only_mate_pairs_libraries(dataset_data):
        support.error('you should specify at least one paired-end or unpaired library (only mate-pairs libraries were found)!')
    if options_storage.rectangles and (len(dataset_data) > 1):
        support.error('rectangle graph algorithm for repeat resolution cannot work with multiple libraries!')

    ### FILLING cfg
    cfg["common"] = empty_config()
    cfg["dataset"] = empty_config()
    if not options_storage.only_assembler:
        cfg["error_correction"] = empty_config()
    if not options_storage.only_error_correction:
        cfg["assembly"] = empty_config()

    # common
    cfg["common"].__dict__["output_dir"] = os.path.abspath(options_storage.output_dir)
    cfg["common"].__dict__["max_threads"] = options_storage.threads
    cfg["common"].__dict__["max_memory"] = options_storage.memory
    cfg["common"].__dict__["developer_mode"] = options_storage.developer_mode

    # dataset section
    cfg["dataset"].__dict__["single_cell"] = options_storage.single_cell
    cfg["dataset"].__dict__["yaml_filename"] = os.path.abspath(options_storage.dataset_yaml_filename)
    if options_storage.developer_mode and options_storage.reference:
        cfg["dataset"].__dict__["reference"] = options_storage.reference

    # error correction
    if (not options_storage.only_assembler) and (options_storage.iterations > 0):
        cfg["error_correction"].__dict__["output_dir"] = os.path.join(cfg["common"].output_dir, "corrected")
        cfg["error_correction"].__dict__["max_iterations"] = options_storage.iterations
        cfg["error_correction"].__dict__["gzip_output"] = not options_storage.disable_gzip_output
        if options_storage.qvoffset:
            cfg["error_correction"].__dict__["qvoffset"] = options_storage.qvoffset
        if options_storage.bh_heap_check:
            cfg["error_correction"].__dict__["heap_check"] = options_storage.bh_heap_check
        if options_storage.tmp_dir:
            cfg["error_correction"].__dict__["tmp_dir"] = options_storage.tmp_dir
        else:
            cfg["error_correction"].__dict__["tmp_dir"] = cfg["error_correction"].output_dir
        cfg["error_correction"].tmp_dir = os.path.join(os.path.abspath(cfg["error_correction"].tmp_dir), 'tmp')

    # assembly
    if not options_storage.only_error_correction:
        if options_storage.k_mers:
            cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers
        else:
            cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers_short
        cfg["assembly"].__dict__["careful"] = options_storage.careful
        if options_storage.spades_heap_check:
            cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check

    #corrector can work only if contigs exist (not only error correction)
    if (not options_storage.only_error_correction) and options_storage.mismatch_corrector:
        cfg["mismatch_corrector"] = empty_config()
        cfg["mismatch_corrector"].__dict__["skip-masked"] = ""
        cfg["mismatch_corrector"].__dict__["bwa"] = os.path.join(bin_home, "bwa-spades")
        cfg["mismatch_corrector"].__dict__["threads"] = options_storage.threads
        cfg["mismatch_corrector"].__dict__["output-dir"] = options_storage.output_dir

    return cfg, dataset_data