def getMapPeptide2Cds(peptide_sequence, cds_sequence, options): """get map between peptide sequence and cds sequence. The returned alignment is in nucleotides. """ # remove whitespaces form protein sequence p = re.sub(" ", "", peptide_sequence) # remove gaps and whitespaces from cds c = re.sub("[ .-]", "", cds_sequence) w = Genomics.Protein2Wobble(p.upper()) if options.loglevel >= 6: options.stdlog.write("# peptide original (%5i): %s\n" % (len(p), p)) options.stdlog.write("# cds original (%5i): %s\n" % (len(c), c)) options.stdlog.write("# wobble sequence (%5i): %s\n" % (len(w), w)) options.stdlog.flush() seq_wobble = alignlib_lite.py_makeSequence(w) seq_cds = alignlib_lite.py_makeSequence(c.upper()) seq_peptide = alignlib_lite.py_makeSequence(p) map_p2c = alignlib_lite.py_makeAlignmentVector() try: AlignCodonBased(seq_wobble, seq_cds, seq_peptide, map_p2c, options=options) except ValueError, msg: raise ValueError("mapping error for sequence: %s" % (msg))
def getMapPeptide2Cds(peptide_sequence, cds_sequence, options): """get map between peptide sequence and cds sequence. The returned alignment is in nucleotides. """ # remove whitespaces form protein sequence p = re.sub(" ", "", peptide_sequence) # remove gaps and whitespaces from cds c = re.sub("[ .-]", "", cds_sequence) w = Genomics.Protein2Wobble(p.upper()) if options.loglevel >= 6: options.stdlog.write("# peptide original (%5i): %s\n" % (len(p), p)) options.stdlog.write("# cds original (%5i): %s\n" % (len(c), c)) options.stdlog.write("# wobble sequence (%5i): %s\n" % (len(w), w)) options.stdlog.flush() seq_wobble = alignlib_lite.py_makeSequence(w) seq_cds = alignlib_lite.py_makeSequence(c.upper()) seq_peptide = alignlib_lite.py_makeSequence(p) map_p2c = alignlib_lite.py_makeAlignmentVector() try: AlignCodonBased(seq_wobble, seq_cds, seq_peptide, map_p2c, options=options) except ValueError as msg: raise ValueError("mapping error for sequence: %s" % (msg)) # if there are more than five frameshifts - do exhaustive alignment max_gaps = 5 num_peptide_gaps = len(re.sub("[^-]", "", p)) ngaps = map_p2c.getNumGaps() - \ (num_peptide_gaps * 3) - abs(len(w) - len(c)) if options.loglevel >= 6: options.stdlog.write( "# alignment between wobble and cds: ngaps=%i, npeptide_gaps=%i\n" % (ngaps, num_peptide_gaps)) printPrettyAlignment(seq_wobble, seq_cds, p, map_p2c, options) if ngaps > max_gaps: if options.loglevel >= 2: options.stdlog.write( "# too many gaps (%i>%i), realigning exhaustively.\n" % (ngaps, max_gaps)) options.stdlog.flush() full_map_p2c = alignlib_lite.py_makeAlignmentVector() AlignExhaustive(seq_wobble, seq_cds, seq_peptide, full_map_p2c, options) if options.loglevel >= 6: options.stdlog.write("# full alignment between wobble and cds:\n") options.stdlog.flush() printPrettyAlignment(seq_wobble, seq_cds, p, full_map_p2c, options) map_p2c = full_map_p2c # remove incomplete codons x = 0 while x < len(p) * 3: if (map_p2c.mapRowToCol(x) < 0 or map_p2c.mapRowToCol(x + 1) < 0 or map_p2c.mapRowToCol(x + 2) < 0): map_p2c.removeRowRegion(x, x + 3) x += 3 if map_p2c.getLength() == 0: if options.loglevel >= 1: options.stdlog.write("# WARNING: empty alignment\n") if options.loglevel >= 6: options.stdlog.write("# peptide original: %s\n" % p) options.stdlog.write("# cds original : %s\n" % c) options.stdlog.write("# wobble sequence : %s\n" % w) raise ValueError("empty alignment") assert (map_p2c.getRowTo() <= seq_wobble.getLength()) assert (map_p2c.getColTo() <= seq_cds.getLength()) return map_p2c