def extract_proteins_from_alignments(self, dir_with_alignments, output_dir): out_dir = self.check_path(output_dir) #print type(FileRoutines) input_files = self.make_list_of_path_to_files([dir_with_alignments] if isinstance(dir_with_alignments, str) else dir_with_alignments) self.safe_mkdir(out_dir) from Routines import MultipleAlignmentRoutines for filename in input_files: filename_list = self.split_filename(filename) output_file = "%s%s%s" % (out_dir, filename_list[1], filename_list[2]) MultipleAlignmentRoutines.extract_sequences_from_alignment(filename, output_file)
required=True, type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")), help="Comma-separated list of files/directories with alignments") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="File to write merged alignment") parser.add_argument( "-c", "--coordinates_file", action="store", dest="coords_file", required=True, help="File to write file with coordinates of alignments in merged alignment" ) parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments") args = parser.parse_args() MultipleAlignmentRoutines.merge_alignment(args.input, args.output, args.coords_file, format=args.format)
action="store", dest="suffix", default=".gaps_removed", help="Suffix to use in output files. Default: '.gaps_removed'") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignment") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="Print not found ids. Default - no") args = parser.parse_args() save_mkdir(args.output) for alignment_file in args.input: splited_filename = split_filename(alignment_file) if args.verbose: print("Handling %s ..." % alignment_file) output_filename = "%s%s%s%s" % (args.output, splited_filename[1], args.suffix, splited_filename[2]) alignment = AlignIO.read(alignment_file, args.format) filtered_alignment = MultipleAlignmentRoutines.remove_columns_with_gaps( alignment, args.max_gap_number, gap_symbol=args.gap_symbol) AlignIO.write(filtered_alignment, output_filename, args.format)
from Routines import MultipleAlignmentRoutines, FileRoutines parser = argparse.ArgumentParser() parser.add_argument("-p", "--protein_alignment", action="store", dest="pep_alignment", required=True, help="File with protein alignment") parser.add_argument("-c", "--cds_seqs", action="store", dest="cds_seqs", required=True, type=lambda s: FileRoutines.make_list_of_path_to_files(s.split(",")), help="Comma-separated list of files/directories with cds sequences") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="File to write codon alignment") parser.add_argument("-a", "--accordance_file", action="store", dest="accordance_file", help="File with CDS to protein id accordance") parser.add_argument("-f", "--alignment_format", action="store", dest="alignment_format", default="fasta", help="Format of alignments. Default: fasta") parser.add_argument("-n", "--cds_seqs_format", action="store", dest="cds_format", default="fasta", help="Format of cds sequences. Default: fasta") parser.add_argument("-i", "--cds_index_file", action="store", dest="cds_index", help="Biopython index of cds files. Default - construct new") parser.add_argument("-r", "--retain_cds_index", action="store_true", dest="retain_cds_index", help="Retain constructed index after analysis. Default - False") args = parser.parse_args() MultipleAlignmentRoutines.get_codon_alignment_from_files(args.pep_alignment, args.cds_seqs, args.output, cds2protein_accordance_file=args.accordance_file, alignment_format=args.alignment_format, nucleotide_sequence_format=args.cds_format, cds_index_file=args.cds_index, retain_cds_index=args.retain_cds_index)
"--output_directory", action="store", dest="output_dir", default="./", help= "Output directory to write resulting files. Default - current directory") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments") parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-", help="Gap symbol. Default - '-'") args = parser.parse_args() for alignment_file in args.input: alignment_name_list = FileRoutines.split_filename(alignment_file) output_file = "%s/%s.position_matrix" % (args.output_dir, alignment_name_list[1]) MultipleAlignmentRoutines.get_position_presence_matrix_fom_file( alignment_file, output_file, format=args.format, gap_symbol=args.gap_symbol)
def expression_hsp(hsp): # hit_span - length of hit for single-fragment HSP(blast etc). DO NOT work with exonerate return (hsp.evalue <= args.max_e_value) and (hsp.hit_span >= args.min_alignment_length) def iterator(blast_results): for query_id in blast_results: filtered_query = blast_results[query_id].hsp_filter(func=expression_hsp) if filtered_query: yield filtered_query """ print("Parsing input file...") blast_results = SearchIO.index(args.input, args.format) gi_ids_list = map(lambda x: x.split("|")[1], MultipleAlignmentRoutines.get_db_ids(blast_results)) #print(gi_ids_list) print("Downloading sequence summaries...") handle = Entrez.esummary(db=args.db, id=",".join(gi_ids_list)) summaries_list = Entrez.read(handle) tax_id_list = set() with open(args.out_prefix + ".taxid", "w") as out_fd: for record in summaries_list: if "TaxId" in record: tax_id_list.add(str(record["TaxId"])) out_fd.write(str(record["TaxId"]) + "\n") print("Downloading species names...") taxa_handle = Entrez.esummary(db="taxonomy", id=",".join(tax_id_list)) taxa_list = Entrez.read(taxa_handle) with open(args.out_prefix + ".sciname", "w") as taxa_fd: with open(args.out_prefix + ".commonname", "w") as com_fd:
import argparse from Routines import MultipleAlignmentRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", required=True, help="File with alignment") parser.add_argument("-c", "--coordinates", action="store", dest="coordinates", required=True, help="File with coordinates of gene alignments") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="File to write alignment prepared for Codeml") args = parser.parse_args() MultipleAlignmentRoutines.prepare_multigene_alignment_for_codeml( args.input, args.coordinates, args.output, format="fasta")
args = parser.parse_args() unique_position_dict = TwoLvlDict() FileRoutines.safe_mkdir(args.output_dir) for alignment_file in args.input: alignment_name_list = FileRoutines.split_filename(alignment_file) output_prefix = "%s/%s.unique_positions" % (args.output_dir, alignment_name_list[1]) unique_position_dict[alignment_name_list[ 1]] = MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file( alignment_file, output_prefix, format=args.format, gap_symbol="-", return_mode="relative", verbose=False) species_list = unique_position_dict.sl_keys() data_dict = OrderedDict() for species in species_list: data_dict[species] = [] for alignment in unique_position_dict: data_dict[species].append(unique_position_dict[alignment][species]) data_list = [data_dict[species] for species in data_dict]
parser.add_argument( "-o", "--output_directory", action="store", dest="output_dir", default="./", help= "Output directory to write resulting files. Default - current directory") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments") parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-", help="Gap symbol. Default - '-'") args = parser.parse_args() for alignment_file in args.input: alignment_name_list = FileRoutines.split_filename(alignment_file) output_prefix = "%s/%s.unique_positions" % (args.output_dir, alignment_name_list[1]) MultipleAlignmentRoutines.count_unique_positions_per_sequence_from_file( alignment_file, output_prefix, format=args.format, gap_symbol="-")
#!/usr/bin/env python __author__ = 'Sergei F. Kliver' import argparse from Routines import MultipleAlignmentRoutines parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", required=True, help="Input file with alignment") parser.add_argument("-o", "--output", action="store", dest="output", required=True, help="File to write alignment of degenerate sites") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Alignment format. Default: fasta") parser.add_argument("-g", "--genetic_code_table", action="store", dest="genetic_code_table", type=int, default=1, help="Genetic code table number") args = parser.parse_args() MultipleAlignmentRoutines.extract_degenerate_sites_from_codon_alignment_from_file(args.input, args.output, genetic_code_table=args.genetic_code_table, format=args.format)
required=True, help="Output file with protein alignment") parser.add_argument("-f", "--format", action="store", dest="format", default="fasta", help="Format of alignments. Default: fasta") parser.add_argument("-g", "--gap_symbol", action="store", dest="gap_symbol", default="-", help="Gap symbol in alignment. Default: '-'") parser.add_argument( "-t", "--genetic_code", action="store", dest="genetic_code", default=1, type=int, help="Genetic code to use(NCBI tables) . Default: 1(standart)") args = parser.parse_args() MultipleAlignmentRoutines.translate_codon_alignment(args.codon_alignment, args.protein_alignment, format=args.format, gap_symbol=args.gap_symbol, table=args.genetic_code)