Exemplo n.º 1
0
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write clusters with labeled elements")
parser.add_argument(
    "-p",
    "--label position",
    action="store",
    dest="label_position",
    default="first",
    help="Position of label. Allowed - first, last. Default - first")
parser.add_argument("-s",
                    "--separator",
                    action="store",
                    dest="separator",
                    default="@",
                    help="Separator to use. default - '@'")

args = parser.parse_args()

label = args.label if args.label else FileRoutines.split_filename(
    args.cluster_file)[1]

SequenceClusterRoutines.label_cluster_elements_from_file(
    args.cluster_file,
    label,
    args.output,
    separator=args.separator,
    label_position=args.label_position)
                    dest="label_ids",
                    help="Label ids by species names. Default - don't label")

parser.add_argument("-g",
                    "--separator_for_labeling",
                    action="store",
                    dest="separator_for_labeling",
                    default="@",
                    help="Separator to use for labeling. Default - '@'")
parser.add_argument("-r",
                    "--label_last",
                    action="store_false",
                    dest="label_first",
                    default=True,
                    help="Place label at the end of id")

args = parser.parse_args()

FileRoutines.safe_mkdir(args.output_dir)
SequenceClusterRoutines.extract_sequences_by_clusters(
    args.input_cluster_dir,
    args.input_seq_dir,
    args.output_dir,
    file_with_white_list_cluster_ids=args.white_list_ids,
    mode=args.mode,
    sequence_file_extension=args.seq_extension,
    sequence_file_format=args.format,
    label_species=args.label_ids,
    separator_for_labeling=args.separator_for_labeling,
    species_label_first=args.label_first)
parser.add_argument("-o",
                    "--output_prefix",
                    action="store",
                    dest="output",
                    help="Output prefix to use")
parser.add_argument("-d",
                    "--output_directory",
                    action="store",
                    dest="out_dir",
                    default="./",
                    help="Directory to write output")
parser.add_argument("-n",
                    "--dont_skip_cluster_if_absent_element",
                    action="store_true",
                    dest="dont_skip_cluster_if_absent_element",
                    default=False,
                    help="Don't skip cluster with absent sequences")

args = parser.parse_args()

SequenceClusterRoutines.extract_sequences_from_selected_clusters(
    args.cluster_id_file,
    args.cluster_file,
    args.seq_file,
    output_dir=args.out_dir,
    seq_format=args.seq_file_format,
    out_prefix=args.output,
    create_dir_for_each_cluster=args.create_dir_for_each_cluster,
    skip_cluster_if_no_sequence_for_element=not args.
    dont_skip_cluster_if_absent_element)
Exemplo n.º 4
0
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write clusters with single-copy clusters")
parser.add_argument(
    "-p",
    "--label position",
    action="store",
    dest="label_position",
    default="first",
    help="Position of label. Allowed - first, last. Default - first")
parser.add_argument("-s",
                    "--separator",
                    action="store",
                    dest="separator",
                    default="@",
                    help="Separator to use. default - '@'")

args = parser.parse_args()

list_of_cluster_files = FileRoutines.make_list_of_path_to_files(args.input)

single_copy_clusters = SequenceClusterRoutines.extract_single_copy_clusters_from_files(
    list_of_cluster_files,
    args.output,
    label_elements=args.label,
    separator=args.separator,
    label_position=args.label_position)

print "Was found %i single-copy clusters" % len(single_copy_clusters)
Exemplo n.º 5
0
from Routines import SequenceClusterRoutines, FileRoutines

parser = argparse.ArgumentParser()

parser.add_argument("-i",
                    "--input_cluster_dir",
                    action="store",
                    dest="input_cluster_dir",
                    required=True,
                    type=FileRoutines.check_path,
                    help="Directory with files with clusters")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write ids of monoclusters")
parser.add_argument("-w",
                    "--white_list_ids",
                    action="store",
                    dest="white_list_ids",
                    help="File with ids from white list. ")

args = parser.parse_args()

SequenceClusterRoutines.extract_monocluster_ids_from_file(
    args.input_cluster_dir,
    args.output,
    file_with_white_list_ids=args.white_list_ids)
Exemplo n.º 6
0
                    dest="cluster_file",
                    required=True,
                    help="File with clusters")
parser.add_argument("-d",
                    "--element_file",
                    action="store",
                    dest="element_file",
                    required=True,
                    help="File with ids of elements")
parser.add_argument("-o",
                    "--output",
                    action="store",
                    dest="output",
                    required=True,
                    help="File to write extracted clusters")
parser.add_argument(
    "-m",
    "--mode",
    action="store",
    dest="mode",
    default="w",
    help=
    "extraction mode. Allowed - 'w' - if elements from element_id_list are present "
    "in cluster extracts only that elements; 'a' - if elements from element_id_list are present "
    "in cluster extracts all elements. Default - 'w'")

args = parser.parse_args()

SequenceClusterRoutines.extract_clusters_by_element_ids_from_file(
    args.cluster_file, args.element_file, args.output, mode=args.mode)
Exemplo n.º 7
0
                    "--separator",
                    action="store",
                    dest="column_separator",
                    default='\t',
                    help="Column separator in synonym file. Default: \\t")

parser.add_argument("-o",
                    "--output_cluster_file",
                    action="store",
                    dest="output_cluster_file",
                    required=True,
                    help="File to write clusters with renamed elements")
parser.add_argument(
    "-r",
    "--remove_clusters_with_not_renamed_elements",
    action="store_true",
    dest="remove_clusters_with_not_renamed_elements",
    help="Remove clusters with not renamed elements. Default: false ")

args = parser.parse_args()

SequenceClusterRoutines.rename_elements_in_clusters(
    args.input_cluster_file,
    args.syn_file,
    args.output_cluster_file,
    remove_clusters_with_not_renamed_elements=args.
    remove_clusters_with_not_renamed_elements,
    syn_file_key_column_index=args.key_column_index,
    syn_file_value_column_index=args.value_column_index,
    syn_file_column_separator=args.column_separator)
parser.add_argument(
    "-a",
    "--output_label position",
    action="store",
    dest="output_label_position",
    default="first",
    help=
    "Position of label in output file. Allowed - first, last. Default - first")
parser.add_argument(
    "-r",
    "--output_separator",
    action="store",
    dest="output_separator",
    default="@",
    help="Separator between label and element id in output file. Default - '@'"
)

args = parser.parse_args()

label = args.label if args.label else FileRoutines.split_filename(
    args.cluster_file)[1]

SequenceClusterRoutines.replace_label_from_file(
    args.input_file,
    args.output_file,
    args.syn_file,
    old_separator=args.input_separator,
    old_label_position=args.input_label_position,
    new_separator=args.output_separator,
    new_label_position=args.output_label_position)