Пример #1
0
    def extract_proteins_from_alignments(dir_with_alignments, output_dir):
        out_dir = FileRoutines.check_path(output_dir)

        print type(FileRoutines)

        input_files = make_list_of_path_to_files(
            [dir_with_alignments] if isinstance(dir_with_alignments, str
                                                ) else dir_with_alignments)

        FileRoutines.safe_mkdir(out_dir)
        from Routines import MultipleAlignmentRoutines
        for filename in input_files:
            filename_list = FileRoutines.split_filename(filename)
            output_file = "%s%s%s" % (out_dir, filename_list[1],
                                      filename_list[2])
            MultipleAlignmentRoutines.extract_sequences_from_alignment(
                filename, output_file)
Пример #2
0
__author__ = 'Sergei F. Kliver'
import os
import argparse

from Bio import AlignIO
from Routines import MultipleAlignmentRoutines
from Routines.File import check_path, make_list_of_path_to_files, save_mkdir, split_filename

parser = argparse.ArgumentParser()

parser.add_argument(
    "-i",
    "--input",
    action="store",
    dest="input",
    type=lambda x: make_list_of_path_to_files(x.split(",")),
    help="Comma-separated list of files or directory with files "
    "containing alignments(one alignment per file)")
parser.add_argument("-n",
                    "--max_gap_number",
                    action="store",
                    dest="max_gap_number",
                    default=0,
                    type=int,
                    help="Maximum number of gaps to retain column")
parser.add_argument("-o",
                    "--output_directory",
                    action="store",
                    dest="output",
                    type=check_path,
                    help="Output directory")
Пример #3
0
                    dest="draw_distribution",
                    help="Draw distribution of kmers")
parser.add_argument("-j",
                    "--jellyfish_path",
                    action="store",
                    dest="jellyfish_path",
                    help="Path to jellyfish")
parser.add_argument("-n",
                    "--dont_extract_kmer_list",
                    action="store_true",
                    dest="dont_extract_kmer_list",
                    help="Don't extract kmer list")

args = parser.parse_args()

args.input = make_list_of_path_to_files(args.input)
if args.count_both_strands and args.add_rev_com:
    raise ValueError(
        "Options -b/--count_both_strands and -r/--add_reverse_complement are not compatible"
    )

if args.add_rev_com:
    file_with_rev_com = args.base_prefix + "_with_rev_com.fasta"
    record_dict = SeqIO.index_db("temp_index.idx", args.input, format="fasta")
    SeqIO.write(rev_com_generator(record_dict, yield_original_record=True),
                file_with_rev_com, "fasta")
    args.base_prefix += "_with_rev_com"

base_file = "%s_%i_mer.jf" % (args.base_prefix, args.kmer_length)
kmer_table_file = "%s_%i_mer.counts" % (args.base_prefix, args.kmer_length)
kmer_file = "%s_%i_mer.kmer" % (args.base_prefix, args.kmer_length)
Пример #4
0
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'
import argparse

from Tools.Annotation import Exonerate
from Routines.File import make_list_of_path_to_files

parser = argparse.ArgumentParser()

parser.add_argument(
    "-i",
    "--input",
    action="store",
    dest="input",
    required=True,
    type=lambda s: make_list_of_path_to_files(s.split(",")),
    help="Input comma-separated list of files/directories with exonerate output"
)
parser.add_argument("-o",
                    "--output_prefix",
                    action="store",
                    dest="output_prefix",
                    help="Prefix of output files")

args = parser.parse_args()

Exonerate.split_output(args.input, args.output_prefix)
Пример #5
0
def make_list_of_path_to_files_from_comma_sep_string(string):
    return make_list_of_path_to_files(string.split(","))
Пример #6
0
                    type=float,
                    help="Maximum number of iterations")
parser.add_argument("-f",
                    "--offset",
                    action="store",
                    dest="offset",
                    type=float,
                    help="Offset (works like gap extension penalty)")
parser.add_argument("-g",
                    "--gap_open_penalty",
                    action="store",
                    dest="gap_open_penalty",
                    type=float,
                    help="Gap open penalty")

args = parser.parse_args()

save_mkdir(args.output)

MAFFT.threads = args.threads
MAFFT.parallel_align(make_list_of_path_to_files(args.input),
                     args.output,
                     output_suffix="alignment",
                     gap_open_penalty=args.gap_open_penalty,
                     offset=args.offset,
                     maxiterate=args.maxiterate,
                     quiet=args.quiet,
                     mode=args.mode,
                     number_of_processes=args.processes,
                     anysymbol=True)
Пример #7
0
    "--write_dir_path",
    action="store_true",
    dest="write_dir_path",
    help=
    "Write directory name(if directory is source of files) in output file. Default: false"
)
parser.add_argument(
    "-e",
    "--write_ext",
    action="store_true",
    dest="write_ext",
    help=
    "Write extensions of files with sequences in output file. Default: false")
args = parser.parse_args()

files_list = sorted(make_list_of_path_to_files(args.input, file_filter))

out_fd = sys.stdout if args.output == "stdout" else open(args.output, "w")

if args.write_header:
    out_fd.write("#file/sample\tnumber_of_sequences\n")
for filename in files_list:
    if args.output != "stdout":
        print("Counting variants in %s ..." % filename)
    directory, prefix, extension = split_filename(filename)
    filetype = detect_filetype_by_extension(filename)
    number_of_sequences = 0
    with open(filename, "r") as seq_fd:
        try:
            for record in SeqIO.parse(seq_fd, filetype):
                number_of_sequences += 1