Exemplo n.º 1
0
def run(args):
    """
    :run the wrapper
    """
    if args.verbose:
        loglevel = logging.INFO
    elif args.quiet:
        loglevel = logging.ERROR
    elif args.debug:
        loglevel = logging.DEBUG
    else:
        loglevel = logging.WARNING

    # reinitialize logging
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename=args.logfile,
                        format='%(asctime)s %(levelname)-8s %(message)s',
                        level=loglevel)

    try:
        grmpyOutput = vcfupdate.read_grmpy(args.input_grm)
        vcfupdate.update_vcf_from_grmpy(args.input, grmpyOutput, args.output)
    except Exception:  # pylint: disable=W0703
        traceback.print_exc(file=LoggingWriter(logging.ERROR))
        raise
Exemplo n.º 2
0
def run(args):
    """
    :run the wrapper
    """
    if args.verbose:
        loglevel = logging.INFO
    elif args.quiet:
        loglevel = logging.ERROR
    elif args.debug:
        loglevel = logging.DEBUG
    else:
        loglevel = logging.WARNING

    os.makedirs(args.output, exist_ok=True)
    if args.scratch_dir:
        os.makedirs(args.scratch_dir, exist_ok=True)

    # reinitialize logging
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename=args.logfile, format='%(asctime)s %(levelname)-8s %(message)s', level=loglevel)

    # check format of manifest
    with open(args.manifest) as manifest_file:
        headers = {"id": False, "path": False, "idxdepth": False, "depth": False, "read length": False, "sex": False}
        for line in manifest_file:
            if line.startswith("#"):
                line = line[1:]
            line = line.strip()
            fields = re.split('\t|,', line)
            for field in fields:
                if field not in headers:
                    header_str = ",".join(headers)
                    raise Exception("Illegal header name %s. Allowed headers:\n%s" % (field, header_str))
                headers[field] = True
            if not headers["id"] or not headers["path"]:
                raise Exception("Missing header \"id\" or \"path\" in manifest")
            if not headers["idxdepth"]:
                if not headers["depth"] or not headers["read length"]:
                    raise Exception("Missing header \"idxdepth\", or \"depth\" and \"read length\" in manifest.")
            break

    # prepare input graph description
    result_json_path = os.path.join(args.output, "genotypes.json.gz")
    try:
        graph_files = load_graph_description(args)
        commandline = " -r %s" % pipes.quote(args.reference)
        commandline += " -m %s" % pipes.quote(args.manifest)
        commandline += " -o %s" % pipes.quote(result_json_path)
        commandline += " -z"

        if args.genotyping_parameters:
            commandline += " -G %s" % pipes.quote(args.genotyping_parameters)
        if args.max_reads_per_event:
            commandline += " -M %s" % pipes.quote(str(args.max_reads_per_event))
        if args.threads > 1:
            commandline += " -t %s" % pipes.quote(str(args.threads))
        if args.graph_sequence_matching:
            commandline += " --graph-sequence-matching %s" % pipes.quote(str(args.graph_sequence_matching))
        if args.klib_sequence_matching:
            commandline += " --klib-sequence-matching %s" % pipes.quote(str(args.klib_sequence_matching))
        if args.kmer_sequence_matching:
            commandline += " --kmer-sequence-matching %s" % pipes.quote(str(args.kmer_sequence_matching))
        if int(args.bad_align_uniq_kmer_len):
            commandline += " --bad-align-uniq-kmer-len %s" % pipes.quote(str(args.bad_align_uniq_kmer_len))
        if args.write_alignments:
            alignment_directory = os.path.join(args.output, "alignments")
            os.makedirs(alignment_directory, exist_ok=True)
            if not os.path.isdir(alignment_directory):
                raise Exception(f"Cannot create alignment output directory: {alignment_directory}")
            commandline += " --alignment-output-folder !%s" % pipes.quote(alignment_directory)
        if args.infer_read_haplotypes:
            commandline += " --infer-read-haplotypes"

        if args.verbose:
            commandline += " --log-level=info"
        elif args.quiet:
            commandline += " --log-level=error"
        elif args.debug:
            commandline += " --log-level=debug"
        else:
            commandline += " --log-level=warning"

        grmpy_log = pipes.quote(os.path.join(args.output, "grmpy.log"))
        commandline += " --log-file " + grmpy_log
        commandline += " --log-async no"

        commandline += " -g"
        for graph in graph_files:
            commandline += "\n%s" % pipes.quote(graph)

        response_file = tempfile.NamedTemporaryFile(dir=args.scratch_dir, mode="wt", suffix=".txt", delete=False)
        response_file.write(commandline)
        response_file.flush()

        commandline = args.grmpy + " --response-file=%s" % pipes.quote(response_file.name)

        logging.info("Starting: %s", commandline)

        subprocess.check_call(commandline, shell=True, stderr=subprocess.STDOUT)

    except Exception:  # pylint: disable=W0703
        traceback.print_exc(file=LoggingWriter(logging.ERROR))
        raise

    try:
        if args.input.endswith("vcf") or args.input.endswith("vcf.gz"):
            grmpyOutput = vcfupdate.read_grmpy(result_json_path)
            result_vcf_path = os.path.join(args.output, "genotypes.vcf.gz")
            vcf_input_path = os.path.join(args.output, "variants.vcf.gz")
            if not os.path.exists(vcf_input_path) or not os.path.isfile(vcf_input_path):
                vcf_input_path = args.input
            vcfupdate.update_vcf_from_grmpy(vcf_input_path, grmpyOutput, result_vcf_path)
    except Exception:  # pylint: disable=W0703
        traceback.print_exc(file=LoggingWriter(logging.ERROR))
        raise