def extract_proteins_from_alignments(self, dir_with_alignments, output_dir): out_dir = self.check_path(output_dir) #print type(FileRoutines) input_files = self.make_list_of_path_to_files([dir_with_alignments] if isinstance(dir_with_alignments, str) else dir_with_alignments) self.safe_mkdir(out_dir) from RouToolPa.Routines import MultipleAlignmentRoutines for filename in input_files: filename_list = self.split_filename(filename) output_file = "%s%s%s" % (out_dir, filename_list[1], filename_list[2]) MultipleAlignmentRoutines.extract_sequences_from_alignment(filename, output_file)
args = parser.parse_args() unique_position_dict = TwoLvlDict() FileRoutines.safe_mkdir(args.output_dir) for alignment_file in args.input: alignment_name_list = FileRoutines.split_filename(alignment_file) output_prefix = "%s/%s.unique_positions" % (args.output_dir, alignment_name_list[1]) unique_position_dict[alignment_name_list[ 1]] = MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file( alignment_file, output_prefix, format=args.format, gap_symbol="-", return_mode="relative", verbose=False) species_list = unique_position_dict.sl_keys() data_dict = OrderedDict() for species in species_list: data_dict[species] = [] for alignment in unique_position_dict: data_dict[species].append(unique_position_dict[alignment][species]) data_list = [data_dict[species] for species in data_dict]
required=True, help="Output file with protein alignment") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments. Default: fasta") parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-", help="Gap symbol in alignment. Default: '-'") parser.add_argument( "-t", "--genetic_code", action="store", dest="genetic_code", default=1, type=int, help="Genetic code to use(NCBI tables) . Default: 1(standart)") args = parser.parse_args() MultipleAlignmentRoutines.translate_codon_alignment(args.codon_alignment, args.protein_alignment, format=args.format, gap_symbol=args.gap_symbol, table=args.genetic_code)
parser.add_argument( "-t", "--type", action="store", dest="type", default="nucleotide", help="Alignment type. Allowed: nucleotide(default), codon, protein") parser.add_argument( "-l", "--flank_length", action="store", dest="flank_length", default=0, type=int, help= "Flank length. Default: 0, i.e no flanks will be included in the output file" ) args = parser.parse_args() MultipleAlignmentRoutines.get_specific_positions( args.input, args.reference_sequence_id, args.position_list, args.output_prefix, format=args.format, gap_symbol=args.gap_symbol, verbose=True, alignment_type=args.type, flank_length=args.flank_length)
__author__ = 'Sergei F. Kliver' import argparse from RouToolPa.Routines import MultipleAlignmentRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", required=True, help="File with alignment") parser.add_argument("-c", "--coordinates", action="store", dest="coordinates", required=True, help="File with coordinates of gene alignments") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="File to write alignment prepared for Codeml") args = parser.parse_args() MultipleAlignmentRoutines.prepare_multigene_alignment_for_codeml( args.input, args.coordinates, args.output, format="fasta")
#!/usr/bin/env python __author__ = 'Sergei F. Kliver' import argparse from RouToolPa.Routines import MultipleAlignmentRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", required=True, help="Input file with alignment") parser.add_argument("-o", "--output_prefix", action="store", dest="output_prefix", required=True, help="Prefix of output files") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Alignment format. Default: fasta") parser.add_argument("-g", "--genetic_code_table", action="store", dest="genetic_code_table", type=int, default=1, help="Genetic code table number") parser.add_argument("-r", "--remove_Ns", action="store_true", dest="remove_Ns", default=False, help="Remove codon columns with Ns. Default:False") args = parser.parse_args() MultipleAlignmentRoutines.extract_degenerate_sites_from_codon_alignment_from_file(args.input, args.output_prefix, genetic_code_table=args.genetic_code_table, format=args.format, remove_codon_columns_with_Ns=args.remove_Ns)
from RouToolPa.Routines import MultipleAlignmentRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", required=True, help="Input file with codon alignment") parser.add_argument("-o", "--output_prefix", action="store", dest="output_prefix", required=True, help="Prefix of output files") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of thr input alignment. Default: fasta") args = parser.parse_args() MultipleAlignmentRoutines.extract_codon_positions_from_file(args.input, args.output_prefix, format=args.format)
type=lambda x: FileRoutines.make_list_of_path_to_files(x.split(",")), help="Comma-separated list of files or directory with files " "containing alignments(one alignment per file)") parser.add_argument("-n", "--max_gap_number", action="store", dest="max_gap_number", default=0, type=int, help="Maximum number of gaps to retain column") parser.add_argument("-o", "--output_directory", action="store", dest="output", type=FileRoutines.check_path, help="Output directory") parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-", help="Gap symbol") parser.add_argument("-s", "--suffix", action="store", dest="suffix", default=".gaps_removed", help="Suffix to use in output files. Default: '.gaps_removed'") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignment") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="Print not found ids. Default - no") args = parser.parse_args() FileRoutines.safe_mkdir(args.output) for alignment_file in args.input: splited_filename = FileRoutines.split_filename(alignment_file) if args.verbose: print ("Handling %s ..." % alignment_file) output_filename = "%s%s%s%s" % (args.output, splited_filename[1], args.suffix, splited_filename[2]) alignment = AlignIO.read(alignment_file, args.format) filtered_alignment = MultipleAlignmentRoutines.remove_columns_with_gaps(alignment, args.max_gap_number, gap_symbol=args.gap_symbol) AlignIO.write(filtered_alignment, output_filename, args.format)
"--output_directory", action="store", dest="output_dir", default="./", help= "Output directory to write resulting files. Default - current directory") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments") parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-", help="Gap symbol. Default - '-'") args = parser.parse_args() for alignment_file in args.input: alignment_name_list = FileRoutines.split_filename(alignment_file) output_file = "%s/%s.position_matrix" % (args.output_dir, alignment_name_list[1]) MultipleAlignmentRoutines.get_position_presence_matrix_fom_file( alignment_file, output_file, format=args.format, gap_symbol=args.gap_symbol)
dest="format", default="fasta", help="Alignment file format. Default: fasta") parser.add_argument( "-a", "--align_variants", action="store_true", dest="align_variants", help="Align variants between species by coordinate. Default: False") parser.add_argument( "-t", "--target_sequence_id", action="store", dest="target_sequence_id", help="Target sequence id. Variants specific for this sequence will be " "extracted into separated file. Default: not set") args = parser.parse_args() MultipleAlignmentRoutines.call_variants_from_multiple_alignment_from_file( args.input, args.output_prefix, args.reference_sequence_id, gap_symbol=args.gap_symbol, verbose=True, format="fasta", align_variants=args.align_variants, output_type="hgvs", variant_separator=",", target_sequence_id=args.target_sequence_id, absent_symbol="")
"-t", "--type", action="store", dest="type", default="nucleotide", help="Alignment type. Allowed: nucleotide(default), codon, protein") parser.add_argument( "-l", "--flank_length", action="store", dest="flank_length", default=0, type=int, help= "Flank length. Default: 0, i.e no flanks will be included in the output file" ) args = parser.parse_args() MultipleAlignmentRoutines.get_specific_positions_for_multiple_files( args.input_dir, args.position_file, args.reference_sequence_id, args.output_dir, alignment_file_suffix=args.alignment_file_suffix, format=args.format, gap_symbol=args.gap_symbol, verbose=True, alignment_type=args.type, flank_length=args.flank_length)
help="Format of alignments. Default: fasta") parser.add_argument("-n", "--cds_seqs_format", action="store", dest="cds_format", default="fasta", help="Format of cds sequences. Default: fasta") parser.add_argument( "-i", "--cds_index_file", action="store", dest="cds_index", help="Biopython index of cds files. Default - construct new") parser.add_argument( "-r", "--retain_cds_index", action="store_true", dest="retain_cds_index", help="Retain constructed index after analysis. Default - False") args = parser.parse_args() MultipleAlignmentRoutines.get_codon_alignment_from_files( args.pep_alignment, args.cds_seqs, args.output, cds2protein_accordance_file=args.accordance_file, alignment_format=args.alignment_format, nucleotide_sequence_format=args.cds_format, cds_index_file=args.cds_index, retain_cds_index=args.retain_cds_index)
parser.add_argument( "-o", "--output_directory", action="store", dest="output_dir", default="./", help= "Output directory to write resulting files. Default - current directory") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments") parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-", help="Gap symbol. Default - '-'") args = parser.parse_args() for alignment_file in args.input: alignment_name_list = FileRoutines.split_filename(alignment_file) output_prefix = "%s/%s.unique_positions" % (args.output_dir, alignment_name_list[1]) MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file( alignment_file, output_prefix, format=args.format, gap_symbol="-")
"--genetic_code_table", action="store", dest="genetic_code_table", type=int, default=1, help="Genetic code table number") parser.add_argument("-a", "--gap_symbol_list", action="store", dest="gap_symbol_list", type=lambda s: s.split(","), default=["-"], help="Comma-separated list of gap symbols. Default: '-'") parser.add_argument("-b", "--use_ambiguous_table", action="store_true", dest="use_ambiguous_table", default=False, help="Use ambiguous codon table. Default:False") args = parser.parse_args() MultipleAlignmentRoutines.count_dNdS_by_reference_seq_in_codon_alignment_from_file( args.input, args.ref_seq_id, genetic_code_table=args.genetic_code_table, gap_symbol_list=args.gap_symbol_list, use_ambigious_table=args.ambigious_table, output_file=args.output, format="fasta")
""" def expression_hsp(hsp): # hit_span - length of hit for single-fragment HSP(blast etc). DO NOT work with exonerate return (hsp.evalue <= args.max_e_value) and (hsp.hit_span >= args.min_alignment_length) def iterator(blast_results): for query_id in blast_results: filtered_query = blast_results[query_id].hsp_filter(func=expression_hsp) if filtered_query: yield filtered_query """ print("Parsing input file...") blast_results = SearchIO.index(args.input, args.format) gi_ids_list = map(lambda x: x.split("|")[1], MultipleAlignmentRoutines.get_db_ids(blast_results)) #print(gi_ids_list) print("Downloading sequence summaries...") handle = Entrez.esummary(db=args.db, id=",".join(gi_ids_list)) summaries_list = Entrez.read(handle) tax_id_list = set() with open(args.out_prefix + ".taxid", "w") as out_fd: for record in summaries_list: if "TaxId" in record: tax_id_list.add(str(record["TaxId"])) out_fd.write(str(record["TaxId"]) + "\n") print("Downloading species names...") taxa_handle = Entrez.esummary(db="taxonomy", id=",".join(tax_id_list)) taxa_list = Entrez.read(taxa_handle) with open(args.out_prefix + ".sciname", "w") as taxa_fd: with open(args.out_prefix + ".commonname", "w") as com_fd:
required=True, help="Output file") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Alignment format. Default: fasta") parser.add_argument("-r", "--remove_Ns", action="store_true", dest="remove_Ns", default=False, help="Remove columns with Ns. Default:False") parser.add_argument("-a", "--remove_columns_with_ambigous_nucleotides", action="store_true", dest="remove_columns_with_ambigous_nucleotides", default=False, help="Remove columns with a. Default:False") args = parser.parse_args() MultipleAlignmentRoutines.extract_variable_sites_from_alignment_from_file( args.input, args.output, format=args.format, remove_columns_with_Ns=args.remove_Ns, remove_columns_with_ambigous_nucleotides=args. remove_columns_with_ambigous_nucleotides)
required=True, type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")), help="Comma-separated list of files/directories with alignments") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="File to write merged alignment") parser.add_argument( "-c", "--coordinates_file", action="store", dest="coords_file", required=True, help="File to write file with coordinates of alignments in merged alignment" ) parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments") args = parser.parse_args() MultipleAlignmentRoutines.merge_alignment(args.input, args.output, args.coords_file, format=args.format)