def __call__(self, seq_path, result_path=None, log_path=None, failure_path=None): # load candidate sequences seq_file = open(seq_path, 'U') candidate_sequences = parse_fasta(seq_file) # load template sequences template_alignment = [] template_alignment_fp = self.Params['template_filepath'] for seq_id, seq in parse_fasta(open(template_alignment_fp)): # replace '.' characters with '-' characters template_alignment.append((seq_id, seq.replace('.', '-').upper())) template_alignment = Alignment.from_fasta_records(template_alignment, DNASequence, validate=True) # initialize_logger logger = NastLogger(log_path) # get function for pairwise alignment method pairwise_alignment_f = pairwise_alignment_methods[ self.Params['pairwise_alignment_method']] pynast_aligned, pynast_failed = pynast_seqs( candidate_sequences, template_alignment, min_pct=self.Params['min_pct'], min_len=self.Params['min_len'], align_unaligned_seqs_f=pairwise_alignment_f, logger=logger, temp_dir=get_qiime_temp_dir()) logger.record(str(self)) for i, seq in enumerate(pynast_failed): skb_seq = DNASequence(str(seq), id=seq.Name) pynast_failed[i] = skb_seq pynast_failed = SequenceCollection(pynast_failed) for i, seq in enumerate(pynast_aligned): skb_seq = DNASequence(str(seq), id=seq.Name) pynast_aligned[i] = skb_seq pynast_aligned = Alignment(pynast_aligned) if failure_path is not None: fail_file = open(failure_path, 'w') fail_file.write(pynast_failed.to_fasta()) fail_file.close() if result_path is not None: result_file = open(result_path, 'w') result_file.write(pynast_aligned.to_fasta()) result_file.close() return None else: return pynast_aligned
def dna_to_aa(sequence, try_frames=False): """ Translates from the input DNA nucleotide sequence to amino acid sequence Arguments: ---------- sequence : str DNA nucleotide sequence Optional: --------- try_frames : Bool if True, tries 6 possible reading frames, translates all to amino acids and chooses sequence with fewest stop codons default = False Returns: -------- aa_sequence : str sequence of one-letter amino acid codes """ orig_code = genetic_code(11) if not try_frames: return orig_code.translate(sequence).sequence sequence = DNASequence(sequence) translated = orig_code.translate_six_frames(sequence) stops = [aastring.sequence.count('*') for aastring in translated] return translated[stops.index(min(stops))].sequence
def _make_mutant(wt_codon, mut_codons): """ Finds the mutant codon, if mutation requires more than 1 nucleotide change Arguments: ---------- wt_codon : str len(wt_codon) = 3 nucleotide codon from the wild type sequence for the residue to be mutated mut_codons : list(str) all codons that translate to desired mutant residue Returns: -------- mut_codon : str codon selected from mut_codons which requires the fewest changes from the wild type codon """ mut_codons = [DNASequence(codon) for codon in mut_codons] distances = [wt_codon.distance(codon) for codon in mut_codons] changed_bp = int(min(distances)*3) print("This mutant required "+str(changed_bp)+"bp modifications\n") # choose the codon that requires fewest changes return mut_codons[distances.index(min(distances))].sequence
def make_single_mutant(self, wt_res,res_num,mut_res): """ Determines how many nucleotide changes are required for the desired amino acid mutation, then constructs a primer with a minimum of 25 nucleotides, increasing the length symmetrically (such that the mutant codon is centered in the primer) up to 45 nucleotides, using the minimum length possible to achieve acceptable melting temperature (78C minimum) DNA sequence needs to start with the first residue of the protein (no promoter, etc) take DNA sequence, convert to AA, define AA point mutant, find corresponding codon of wt and mut, output forward and reverse primers DNA sequence should be only the kinase domain Desired mutation should require only a single nucleotide change; will print warning if more nucleotide changes are required Arguments: ---------- sequence : str DNA sequence wt_res : char single letter amino acid code of wildtype residue to be mutated res_num : int residue id number of residue to be mutated mut_res : char single letter amino acid code of mutant residue Returns: -------- forward_primer : str nucleotide sequence reverse_primer : str nucleotide sequence """ aa_sequence = self.aa_sequence sequence = self.sequence first_res = self.first_res orig_code = self.orig_code if not str(wt_res) == aa_sequence[res_num-first_res]: raise IOError("Desired residue not found -- check wildtype residue name and id, and first residue id") # start of codon of residue of interest is at (res_num - first_res)*3 wt_codon = DNASequence(sequence[(res_num - first_res)*3:(res_num - first_res)*3+3]) mut_codons = orig_code.synonyms[mut_res] mut_codon = None for codon in mut_codons: if wt_codon.distance(DNASequence(codon))*3 == 1: mut_codon = codon if not mut_codon: print("Cannot make desired mutant with a single base change") mut_codon = self._make_mutant(wt_codon, mut_codons) good_melting_temp = False start_ix = max(0,(res_num-first_res)*3-11) end_ix = min(len(sequence),(res_num+1-first_res)*3+11) while not good_melting_temp: if end_ix - start_ix > 45: print("Acceptable melting temp was not found") break forward_primer = sequence[start_ix:(res_num - first_res)*3]+mut_codon+sequence[(res_num+1 - first_res)*3:end_ix] forward_primer = forward_primer.lower() good_melting_temp, start_ix, end_ix = self._check_melting_temp(forward_primer, start_ix, end_ix, len(sequence)) forward_sequence = DNASequence(forward_primer) reverse_sequence = forward_sequence.rc() reverse_primer = reverse_sequence.sequence return forward_primer, reverse_primer