Exemplo n.º 1
0
        good_genomes = args.good_genomes.split(",")
        if args.trim_names:
            good_genomes = [
                genome_name.split(args.trimmer_char, 1)[0]
                for genome_name in good_genomes
            ]
        for genome_name in list(genomes.keys()):
            if genome_name not in good_genomes:
                del genomes[genome_name]
    if args.bad_genomes != "":
        bad_genomes = args.bad_genomes.split(",")
        if args.trim_names:
            bad_genomes = [
                genome_name.split(args.trimmer_char, 1)[0]
                for genome_name in bad_genomes
            ]
        for genome_name in list(genomes.keys()):
            if genome_name in bad_genomes:
                del genomes[genome_name]
    if args.filter_duplications:
        remove_duplications(genomes=genomes)
    result = get_assembly_points(genomes=genomes)
    logger.info(
        "Writing output to file \"{file_name}\"".format(file_name=args.output))
    camsa_io.write_assembly_points(assembly_points=result,
                                   destination=args.output,
                                   output_setup=args.o_format,
                                   delimiter=args.o_delimiter)
    logger.info("Elapsed time: {el_time}".format(
        el_time=str(datetime.datetime.now() - start_time)))
Exemplo n.º 2
0
        print("# NOTE: this is not a valid config, but rather a summary of the utilized options", file=destination)
        print(parser.format_values(), file=destination)
    for pairs_path in args.points:
        full_path = os.path.abspath(os.path.expanduser(pairs_path))
        base_name = os.path.basename(full_path)
        shutil.copyfile(src=full_path, dst=os.path.join(input_report_dir, base_name))

    # "merged" subdir of the report
    # will contain assembly points, that constitute the merged assembly
    merged_report_dir = os.path.join(args.output_dir, "merged")
    camsa_io.remove_dir(dir_path=merged_report_dir)
    os.makedirs(merged_report_dir)
    merged_report_points_path = os.path.join(merged_report_dir, "merged.camsa.points")
    with open(merged_report_points_path, "wt") as destination:
        camsa_io.write_assembly_points(destination=destination,
                                       assembly_points=[ap for ap in merged_assembly_points if ap.participates_in_merged],
                                       output_setup=args.o_merged_format)

    # "comparative" subdir of the report
    # will contain assembly points divided into subgroups based in the agreement in input assemblies
    comparative_report_dir = os.path.join(args.output_dir, "comparative")
    camsa_io.remove_dir(dir_path=comparative_report_dir)
    subgroups_report_dir = os.path.join(comparative_report_dir, "subgroups")
    camsa_io.remove_dir(comparative_report_dir)
    os.makedirs(comparative_report_dir)
    os.makedirs(subgroups_report_dir)
    for group in grouped_assemblies:
        comparative_report_group_points_path = os.path.join(subgroups_report_dir, "{group_name}.camsa.points".format(group_name=".".join(group.name)))
        with open(comparative_report_group_points_path, "wt") as destination:
            camsa_io.write_assembly_points(assembly_points=group.aps,
                                           destination=destination,
Exemplo n.º 3
0
    ch.setFormatter(logging.Formatter(args.c_logging_formatter_entry))
    logger.info("Starting the converting process")

    paths = []
    for file_name in args.agouti:
        logger.info("Processing file \"{file_name}\"".format(file_name=file_name))
        with open(file_name, "rt") as source:
            for line in source:
                line = line.strip()
                if len(line) == 0 or line.startswith("#") or line.startswith(">"):
                    continue
                path = line.split(args.i_delimiter)
                if args.source is not None:
                    source = args.source
                else:
                    source = os.path.basename(file_name)
                    source = os.path.splitext(source)[0]
                paths.append((source, path))
    logger.info("A total of {paths_cnt} paths were extracted from {file_cnt} files"
                "".format(paths_cnt=len(paths), file_cnt=len(args.agouti)))
    assembly_points = []
    for source, path in paths:
        if len(path) <= 1:
            logger.warning("Encountered a path of length <= 1 {{{path}}}; skipping"
                           "".format(path=",".join(path)))
            continue
        assembly_points.extend(get_assembly_points(agouti_path=path, source=source, oriented=args.oriented))
    logger.info("Writing output to file \"{file_name}\"".format(file_name=args.output))
    camsa_io.write_assembly_points(assembly_points=assembly_points, destination=args.output, output_setup=args.o_format, delimiter=args.o_delimiter)
    logger.info("Elapsed time: {el_time}".format(el_time=str(datetime.datetime.now() - start_time)))
Exemplo n.º 4
0
            data = line.split("\t", 8)
            if Component.is_scaffold_component(data[4]):
                logger.debug(
                    "Processing a non-gap data line: {line}".format(line=line))
                component = ScaffoldComponent.from_agp_data(data=data)
            else:
                logger.debug(
                    "Processing a gap data line: {line}".format(line=line))
                component = GapComponent.from_agp_data(data=data)
            objects[component.object_id].append(component)

    for object_id in objects.keys():
        logger.debug(
            "Processing object {object_id}".format(object_id=object_id))
        objects[object_id] = sorted(
            objects[object_id],
            key=lambda component: (component.object_beg, component.object_end))
        components = objects[object_id]
        camsa_points = object_as_camsa_points(object_as_components=components,
                                              extra_data=args)
        assembly_points.extend(camsa_points)

    logger.info("Writing CAMSA formatted assembly poitns to {file}".format(
        file=args.output.name))
    camsa_io.write_assembly_points(assembly_points=assembly_points,
                                   destination=args.output,
                                   output_setup=args.o_format)
    logger.info("Finished the conversion.")
    logger.info("Elapsed time: {el_time}".format(
        el_time=str(datetime.datetime.now() - start_time)))
Exemplo n.º 5
0
        full_path = os.path.abspath(os.path.expanduser(pairs_path))
        base_name = os.path.basename(full_path)
        shutil.copyfile(src=full_path,
                        dst=os.path.join(input_report_dir, base_name))

    # "merged" subdir of the report
    # will contain assembly points, that constitute the merged assembly
    merged_report_dir = os.path.join(args.output_dir, "merged")
    camsa_io.remove_dir(dir_path=merged_report_dir)
    os.makedirs(merged_report_dir)
    merged_report_points_path = os.path.join(merged_report_dir,
                                             "merged.camsa.points")
    with open(merged_report_points_path, "wt") as destination:
        camsa_io.write_assembly_points(destination=destination,
                                       assembly_points=[
                                           ap for ap in merged_assembly_points
                                           if ap.participates_in_merged
                                       ],
                                       output_setup=args.o_merged_format)

    # "comparative" subdir of the report
    # will contain assembly points divided into subgroups based in the agreement in input assemblies
    comparative_report_dir = os.path.join(args.output_dir, "comparative")
    camsa_io.remove_dir(dir_path=comparative_report_dir)
    subgroups_report_dir = os.path.join(comparative_report_dir, "subgroups")
    camsa_io.remove_dir(comparative_report_dir)
    os.makedirs(comparative_report_dir)
    os.makedirs(subgroups_report_dir)
    for group in grouped_assemblies:
        comparative_report_group_points_path = os.path.join(
            subgroups_report_dir, "{group_name}.camsa.points".format(
                group_name=".".join(group.name)))