def _MakeAln(query_id, hit_id, query_string, templ_string, q_offset, t_offset): s1 = seq.CreateSequence(query_id, query_string) s1.offset = q_offset-1 s2 = seq.CreateSequence(hit_id, templ_string) s2.offset = t_offset-1 return seq.CreateAlignment(s1, s2)
def _ParseHsp(query_id, hit_id, hsp, tot_query_len, seqid_thres=0, evalue_thres=float("infinity")): bit_score=float(_GetValue(hsp, 'Hsp_bit-score')) score=float(_GetValue(hsp, 'Hsp_score')) evalue=float(_GetValue(hsp, 'Hsp_evalue')) try: identity=float(_GetValue(hsp, 'Hsp_identity')) except AssertionError: # The Hsp_identity tag is not a 'must' in the BLAST XML format. It # describes the number of matching characters. Hence we assume, if it is # missing, there are 0 matches. identity=0 hsp_align_len=float(_GetValue(hsp, 'Hsp_align-len')) seqid=identity/hsp_align_len query_offset=_GetInt(hsp, 'Hsp_query-from')-1 hit_offset=_GetInt(hsp, 'Hsp_hit-from')-1 query_seq=seq.CreateSequence(str(query_id), str(_GetValue(hsp, 'Hsp_qseq'))) query_seq.offset=query_offset hit_seq=seq.CreateSequence(str(hit_id), str(_GetValue(hsp, 'Hsp_hseq'))) hit_seq.offset=hit_offset try: if seqid > float(seqid_thres) and evalue < evalue_thres: aln=seq.CreateAlignment(query_seq, hit_seq) return AlignedPatch(aln, bit_score, score, evalue, seqid) except Exception as e: print(str(e), query_seq, hit_seq)
def _RunkClust(tmp_dir_name, clustering_thresh, create_alignments): bitscore = clustering_thresh * 0.060269 - 0.68498 executable = settings.Locate('kClust') cmd = [] cmd.append(executable) cmd.append('-i') cmd.append(os.path.join(tmp_dir_name, 'fastadb.fasta')) cmd.append('-d') cmd.append(tmp_dir_name) cmd.append('-s') cmd.append(str(bitscore)) cmd = ' '.join(cmd) ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = ps.communicate() result = _ParseOutput(tmp_dir_name) if (create_alignments): from ost.bindings import clustalw for c in result: if len(c.sequences) > 1: c.alignment = clustalw.ClustalW(c.sequences) else: aln = seq.CreateAlignment() aln.AddSequence(c.sequences[0]) c.alignment = aln return result
def _ParseTmAlign(lines,lines_matrix): info_line=lines[12].split(',') aln_length=int(info_line[0].split('=')[1].strip()) rmsd=float(info_line[1].split('=')[1].strip()) tm_score=float(lines[14].split('=')[1].split('(')[0].strip()) tf1=[float(i.strip()) for i in lines_matrix[2].split()] tf2=[float(i.strip()) for i in lines_matrix[3].split()] tf3=[float(i.strip()) for i in lines_matrix[4].split()] rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3], tf2[4], tf3[2], tf3[3], tf3[4]) tf=geom.Mat4(rot) tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1])) seq1 = seq.CreateSequence("1",lines[18].strip()) seq2 = seq.CreateSequence("2",lines[20].strip()) alignment = seq.CreateAlignment() alignment.AddSequence(seq2) alignment.AddSequence(seq1) return ost.bindings.TMAlignResult(rmsd, tm_score, aln_length, tf, alignment)
def testAlnSlice(self): a = seq.CreateSequence('A', 'abcd') b = seq.CreateSequence('B', 'efgh') c = seq.CreateSequence('C', 'ijkl') d = seq.CreateSequence('D', 'mnop') aln = seq.CreateAlignment(a, b, c, d) sliced = aln[1:] self.assertEqual(len(sliced), 3) self.assertEqual(str(sliced[0]), 'bfjn') self.assertEqual(str(sliced[1]), 'cgko') self.assertEqual(str(sliced[2]), 'dhlp') sliced = aln[:-1] self.assertEqual(len(sliced), 3) self.assertEqual(str(sliced[0]), 'aeim') self.assertEqual(str(sliced[1]), 'bfjn') self.assertEqual(str(sliced[2]), 'cgko') sliced = aln[-1:] self.assertEqual(len(sliced), 1) self.assertEqual(str(sliced[0]), 'dhlp')
def _ParseiAlign(lines): info_line = lines[18].split(',') is_score = float(info_line[0].split('=')[1].strip()) aln_residues = int(lines[19].split('=')[1].strip()) aln_contacts = int(lines[20].split('=')[1].strip()) info_line = lines[21].split(',') rmsd = float(info_line[0].split('=')[1].strip()) tf1 = [float(i.strip()) for i in lines[25][1:].split()] tf2 = [float(i.strip()) for i in lines[26][1:].split()] tf3 = [float(i.strip()) for i in lines[27][1:].split()] rot = geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3], tf2[4], tf3[2], tf3[3], tf3[4]) tf = geom.Mat4(rot) tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1])) seq1 = seq.CreateSequence("1", lines[32].strip()) seq2 = seq.CreateSequence("2", lines[34].strip()) alignment = seq.CreateAlignment() alignment.AddSequence(seq2) alignment.AddSequence(seq1) return iAlignResult(rmsd, tf, alignment, is_score, aln_residues, aln_contacts)
def __DisplayAlignment(self, ent_list, res_list): if(len(res_list)>0): ref_seq = seq.CreateSequence("%s (ref)"%ent_list[0].GetName(), res_list[0].alignment.GetSequence(1).GetGaplessString()) aln_list = seq.AlignmentList() if(ref_seq.IsValid()): for i in range(0, len(res_list)): # WrappedTMAlign returns an alignment with second sequence # being reference... let's swap... new_aln = seq.CreateAlignment() new_aln.AddSequence(res_list[i].alignment.GetSequence(1)) new_aln.AddSequence(res_list[i].alignment.GetSequence(0)) new_aln.SetSequenceName(1, ent_list[i+1].GetName()) aln_list.append(new_aln) alignment = alg.MergePairwiseAlignments(aln_list, ref_seq) gosty = gui.GostyApp.Instance() main_area = gosty.perspective.GetMainArea() if self.seq_viewer: self.seq_viewer.qobject.close() self.seq_viewer = gui.SequenceViewer(True) self.seq_viewer.AddAlignment(alignment) self.seq_viewer.ChangeDisplayMode("Highlight conservation 1") self.seq_viewer.Show()
def AlignmentFromChainView(chain, handle_seq_name='handle', view_seq_name='view'): """ Creates and returns the sequence alignment of the given chain view to the chain handle. The alignment contains two sequences, the first containing all non-ligand peptide-linking residues, the second containing all non-ligand peptide-linking residues that are part of the view. :param chain: A valid chain :type chain: :class:`~ost.mol.ChainView` :param handle_seq_name: Name of the handle sequence in the output alignment :param view_seq_name: Name of the view sequence in the output alignment :returns: The alignment :rtype: :class:`~ost.seq.AlignmentHandle` """ from ost import seq v0 = chain.handle.Select('ligand=false and peptide=true') v1 = chain.Select('ligand=false and peptide=true') s0 = seq.CreateSequence(handle_seq_name, '') s1 = seq.CreateSequence(view_seq_name, '') s0.AttachView(v0) s1.AttachView(v1) res0 = v0.residues res1 = v1.residues idx0, idx1 = (0, 0) while idx0 < len(res0): s0.Append(res0[idx0].one_letter_code) if idx1 < len(res1) and res1[idx1].handle == res0[idx0].handle: s1.Append(res1[idx1].one_letter_code) idx1 += 1 else: s1.Append('-') idx0 += 1 return seq.CreateAlignment(s0, s1)
def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True): """ Aligns the residues of chain to the SEQRES sequence, inserting gaps where needed. The function uses the connectivity of the protein backbone to find consecutive peptide fragments. These fragments are then aligned to the SEQRES sequence. All the non-ligand, peptide-linking residues of the chain must be listed in SEQRES. If there are any additional residues in the chain, the function raises a ValueError. :param chain: Source of the sequence :type chain: :class:`~ost.mol.ChainHandle` :param seqres: SEQRES sequence :type seqres: :class:`str` :param try_resnum_first: If set to True, this first builds an alignment using residue numbers and checks if the one-letter-codes match. If they all match, this alignment is used (and possibly validated). Otherwise, it displays a warning and falls back to the connectivity-based alignment. :type try_resnum_first: :class:`bool` :param validate: If set to True, the alignment is additionally checked by :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises a ValueError if the validation failed. :type validate: :class:`bool` :returns: The alignment of the residues in the chain and the SEQRES entries. :rtype: :class:`~ost.seq.AlignmentHandle` """ def IsEqual(olc1, olc2): return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2 from ost import seq from ost import mol from ost import LogWarning view = chain residues = view.residues if len(residues) == 0: return seq.CreateAlignment() if try_resnum_first: aln_seq = seq.CreateSequence('atoms', '-' * len(seqres)) for r1 in residues: if r1.number.num <= len(seqres) and r1.number.num > 0: if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code): aln_seq[r1.number.num - 1] = r1.one_letter_code else: LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code + '", while SEQRES is "' + seqres[r1.number.num - 1] + '" at the corresponding position.') try_resnum_first = False break if not try_resnum_first: fragments = [residues[0].one_letter_code] for r1, r2 in zip(residues[:-1], residues[1:]): if not mol.InSequence(r1.handle, r2.handle): fragments.append('') fragments[-1] += r2.one_letter_code ss = str(seqres) pos = 0 aln_seq = '' for frag in fragments: new_pos = ss.find(frag, pos) if new_pos == -1: raise ValueError('"%s" is not a substring of "%s"' % (frag, ss)) aln_seq += '-' * (new_pos - pos) + frag pos = new_pos + len(frag) aln_seq = seq.CreateSequence( 'atoms', aln_seq + ('-' * (len(seqres) - len(aln_seq)))) alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)), aln_seq) if validate and not ValidateSEQRESAlignment(alignment, view): raise ValueError( "SEQRES cannot be aligned with its corresponding chain.") return alignment
def ParseHHM(profile): ''' Parse secondary structure information and the MSA out of an HHM profile as produced by :meth:`HHblits.A3MToProfile`. :param profile: Opened file handle holding the profile. :type profile: :class:`file` :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf" (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and "consensus" (:class:`~ost.seq.SequenceHandle`). ''' profile_dict = dict() state = 'NONE' pred_seq_txt = '' conf_seq_txt = '' consensus_txt = '' msa_seq = list() msa_head = list() for line in profile: if len(line.rstrip()) == 0: continue if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure': state = 'sspred' continue elif line.rstrip() == '>ss_conf PSIPRED confidence values': state = 'ssconf' continue elif line.rstrip() == '>Consensus': state = 'consensus' continue elif line[0] == '>': if state == 'consensus' or state == 'msa': msa_seq.append('') msa_head.append(line[1:].rstrip()) else: raise IOError('Profile file "%s" is missing ' % profile.name+ 'the "Consensus" section') state = 'msa' continue elif line[0] == '#': state = 'NONE' continue if state == 'sspred': pred_seq_txt += line.rstrip() elif state == 'ssconf': conf_seq_txt += line.rstrip() elif state == 'msa': msa_seq[len(msa_seq)-1] += line.rstrip() elif state == 'consensus': consensus_txt += line.rstrip() profile_dict['ss_pred'] = list() profile_dict['ss_conf'] = list() for i in range(0, len(pred_seq_txt)): profile_dict['ss_pred'].append(pred_seq_txt[i]) profile_dict['ss_conf'].append(int(conf_seq_txt[i])) # post processing # MSA profile_dict['msa'] = None if len(msa_seq): t = msa_seq[0] al = seq.AlignmentList() for i in range(1, len(msa_seq)): qs = '' ts = '' k = 0 for c in msa_seq[i]: if c.islower(): qs += '-' ts += c.upper() else: qs += t[k] ts += c k += 1 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), seq.CreateSequence(msa_head[i], ts)) al.append(nl) profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\ al, seq.CreateSequence(msa_head[0], t)) #print profile_dict['msa'].ToString(80) # Consensus profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt) return profile_dict
def ParseA3M(a3m_file): ''' Parse secondary structure information and the multiple sequence alignment out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`. :param a3m_file: Iterable containing the lines of the A3M file :type a3m_file: iterable (e.g. an open file handle) :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf" (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`). ''' profile_dict = dict() state = 'NONE' pred_seq_txt = '' conf_seq_txt = '' msa_seq = list() msa_head = list() for line in a3m_file: if len(line.rstrip()) == 0: continue elif line.startswith('>ss_pred'): state = 'sspred' continue elif line.startswith('>ss_conf'): state = 'ssconf' continue elif line[0] == '>': if state == 'ssconf' or state == 'msa': msa_seq.append('') msa_head.append(line[1:].rstrip()) else: raise IOError('The A3M file is missing the "ss_conf" section') state = 'msa' continue if state == 'sspred': pred_seq_txt += line.rstrip() elif state == 'ssconf': conf_seq_txt += line.rstrip() elif state == 'msa': msa_seq[len(msa_seq)-1] += line.rstrip() profile_dict['ss_pred'] = list() profile_dict['ss_conf'] = list() for i in range(0, len(pred_seq_txt)): profile_dict['ss_pred'].append(pred_seq_txt[i]) profile_dict['ss_conf'].append(int(conf_seq_txt[i])) # post processing # MSA profile_dict['msa'] = None if len(msa_seq) > 1: t = msa_seq[0] al = seq.AlignmentList() for i in range(1, len(msa_seq)): qs = '' ts = '' k = 0 for c in msa_seq[i]: if c.islower(): qs += '-' ts += c.upper() else: qs += t[k] ts += c k += 1 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), seq.CreateSequence(msa_head[i], ts)) al.append(nl) profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\ al, seq.CreateSequence(msa_head[0], t)) return profile_dict
def testValidateEmptySequenceWorking(self): alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', ''), seq.CreateSequence('atoms', '')) chain = mol.ChainHandle() self.assertEqual(seq.alg.ValidateSEQRESAlignment(alignment, chain), True)