def _MakeAln(query_id, hit_id, query_string, templ_string, q_offset, t_offset): s1 = seq.CreateSequence(query_id, query_string) s1.offset = q_offset-1 s2 = seq.CreateSequence(hit_id, templ_string) s2.offset = t_offset-1 return seq.CreateAlignment(s1, s2)
def _ParseHsp(query_id, hit_id, hsp, tot_query_len, seqid_thres=0, evalue_thres=float("infinity")): bit_score=float(_GetValue(hsp, 'Hsp_bit-score')) score=float(_GetValue(hsp, 'Hsp_score')) evalue=float(_GetValue(hsp, 'Hsp_evalue')) try: identity=float(_GetValue(hsp, 'Hsp_identity')) except AssertionError: # The Hsp_identity tag is not a 'must' in the BLAST XML format. It # describes the number of matching characters. Hence we assume, if it is # missing, there are 0 matches. identity=0 hsp_align_len=float(_GetValue(hsp, 'Hsp_align-len')) seqid=identity/hsp_align_len query_offset=_GetInt(hsp, 'Hsp_query-from')-1 hit_offset=_GetInt(hsp, 'Hsp_hit-from')-1 query_seq=seq.CreateSequence(str(query_id), str(_GetValue(hsp, 'Hsp_qseq'))) query_seq.offset=query_offset hit_seq=seq.CreateSequence(str(hit_id), str(_GetValue(hsp, 'Hsp_hseq'))) hit_seq.offset=hit_offset try: if seqid > float(seqid_thres) and evalue < evalue_thres: aln=seq.CreateAlignment(query_seq, hit_seq) return AlignedPatch(aln, bit_score, score, evalue, seqid) except Exception as e: print(str(e), query_seq, hit_seq)
def testNormalise(self): seq_a = seq.CreateSequence("A", "B-D-FGH") self.assertEqual("B-D-FGH", seq_a.GetString()) seq_a.Normalise() self.assertEqual("BDFGH", seq_a.GetString()) seq_a = seq.CreateSequence("A", "b.d-fgh") self.assertEqual("b.d-fgh", seq_a.GetString()) seq_a.Normalise() self.assertEqual("BDFGH", seq_a.GetString())
def testViewsFromSequences_03(self): seq_a = seq.CreateSequence("A", "ABCD--GH") seq_a.AttachView(self.ent.Select('rname=A,B,C,D,G,H')) seq_b = seq.CreateSequence("B", "ABCD-FGH") seq_b.AttachView(self.ent.Select('rname=A,B,C,D,F,G,H')) a, b = seq.ViewsFromSequences(seq_a, seq_b) string_a = ''.join([r.one_letter_code for r in a.residues]) string_b = ''.join([r.one_letter_code for r in b.residues]) self.assertEqual(string_a, 'ABCDGH') self.assertEqual(string_b, 'ABCDGH')
def testViewsFromSequences_08(self): seq_a = seq.CreateSequence("A", "A-C-E-G") seq_a.AttachView(self.ent.Select('rname=A,C,E,G')) seq_b = seq.CreateSequence("B", "-B-D-H-") seq_b.AttachView(self.ent.Select('rname=B,D,H')) a, b = seq.ViewsFromSequences(seq_a, seq_b) string_a = ''.join([r.one_letter_code for r in a.residues]) string_b = ''.join([r.one_letter_code for r in b.residues]) self.assertEqual(string_a, '') self.assertEqual(string_b, '')
def testDeletionInSeqB(self): seq_a=seq.CreateSequence('A', 'aacdefghiklmn') seq_b=seq.CreateSequence('B', 'acdhiklmn') alns=seq.alg.GlobalAlign(seq_a, seq_b, seq.alg.BLOSUM62) self.assertEqual(len(alns), 1) self.assertEqual(alns[0].sequences[0].name, 'A') self.assertEqual(alns[0].sequences[1].name, 'B') self.assertEqual(str(alns[0].sequences[0]), 'aacdefghiklmn') self.assertEqual(str(alns[0].sequences[1]), '-acd---hiklmn') self.assertEqual(alns[0].sequences[0].offset, 0) self.assertEqual(alns[0].sequences[1].offset, 0)
def testSemiGlobalAlignment(self): seq_a=seq.CreateSequence('A', 'abcdefghijklmnok') seq_b=seq.CreateSequence('B', 'cdehijk') alns=seq.alg.SemiGlobalAlign(seq_a, seq_b, seq.alg.BLOSUM62) self.assertEqual(len(alns), 1) self.assertEqual(alns[0].sequences[0].name, 'A') self.assertEqual(alns[0].sequences[1].name, 'B') self.assertEqual(str(alns[0].sequences[0]), 'abcdefghijklmnok') self.assertEqual(str(alns[0].sequences[1]), '--cde--hijk-----') self.assertEqual(alns[0].sequences[0].offset, 0) self.assertEqual(alns[0].sequences[1].offset, 0)
def testOffset(self): seq_a=seq.CreateSequence('A', 'acdhiklmn') seq_b=seq.CreateSequence('B', 'ggiklmn') alns=seq.alg.GlobalAlign(seq_a, seq_b, seq.alg.BLOSUM62) self.assertEqual(len(alns), 1) self.assertEqual(alns[0].sequences[0].name, 'A') self.assertEqual(alns[0].sequences[1].name, 'B') self.assertEqual(str(alns[0].sequences[0]), 'acdhiklmn') self.assertEqual(str(alns[0].sequences[1]), 'g--giklmn') self.assertEqual(alns[0].sequences[0].offset, 0) self.assertEqual(alns[0].sequences[1].offset, 0)
def testViewsFromSequences_09(self): seq_a = seq.CreateSequence("A", "B-D-FGH") seq_a.AttachView(self.ent.Select('rname=A,B,D,F,G,H')) seq_a.offset = 1 seq_b = seq.CreateSequence("B", "B-DEF-H") seq_b.offset = 1 seq_b.AttachView(self.ent.Select('rname=A,B,D,E,F,H')) a, b = seq.ViewsFromSequences(seq_a, seq_b) string_a = ''.join([r.one_letter_code for r in a.residues]) string_b = ''.join([r.one_letter_code for r in b.residues]) self.assertEqual(string_a, 'BDFH') self.assertEqual(string_b, 'BDFH')
def testSeqSlice(self): seq_string = 'abcdefg' s = seq.CreateSequence('A', seq_string) self.assertEqual(s[1:5], seq_string[1:5]) self.assertEqual(s[:-1], seq_string[:-1]) self.assertEqual(s[-3:-2], seq_string[-3:-2]) self.assertEqual(s[-3:], seq_string[-3:]) self.assertEqual(s[3:4], seq_string[3:4])
def _ParseTmAlign(lines,lines_matrix): info_line=lines[12].split(',') aln_length=int(info_line[0].split('=')[1].strip()) rmsd=float(info_line[1].split('=')[1].strip()) tm_score=float(lines[14].split('=')[1].split('(')[0].strip()) tf1=[float(i.strip()) for i in lines_matrix[2].split()] tf2=[float(i.strip()) for i in lines_matrix[3].split()] tf3=[float(i.strip()) for i in lines_matrix[4].split()] rot=geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3], tf2[4], tf3[2], tf3[3], tf3[4]) tf=geom.Mat4(rot) tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1])) seq1 = seq.CreateSequence("1",lines[18].strip()) seq2 = seq.CreateSequence("2",lines[20].strip()) alignment = seq.CreateAlignment() alignment.AddSequence(seq2) alignment.AddSequence(seq1) return ost.bindings.TMAlignResult(rmsd, tm_score, aln_length, tf, alignment)
def testProfile(self): # try to create a search profile query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+ 'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+ 'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+ 'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+ 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) a3m = self.hh.BuildQueryMSA('testfiles/hhblitsdb/hhblitsdb') self.assertTrue(filecmp.cmp(a3m, "testfiles/testali.a3m"))
def testSeqListSlice(self): a = seq.CreateSequence('A', 'aaaa') b = seq.CreateSequence('B', 'bbbb') c = seq.CreateSequence('C', 'cccc') d = seq.CreateSequence('D', 'dddd') sl = seq.CreateSequenceList(a, b, c, d) sliced = sl[1:] self.assertEqual(len(sliced), 3) self.assertEqual(str(sliced[0]), str(b)) self.assertEqual(str(sliced[1]), str(c)) self.assertEqual(str(sliced[2]), str(d)) sliced = sl[:-1] self.assertEqual(len(sliced), 3) self.assertEqual(str(sliced[0]), str(a)) self.assertEqual(str(sliced[1]), str(b)) self.assertEqual(str(sliced[2]), str(c)) sliced = sl[-1:] self.assertEqual(len(sliced), 1) self.assertEqual(str(sliced[0]), str(d))
def testAlnSlice(self): a = seq.CreateSequence('A', 'abcd') b = seq.CreateSequence('B', 'efgh') c = seq.CreateSequence('C', 'ijkl') d = seq.CreateSequence('D', 'mnop') aln = seq.CreateAlignment(a, b, c, d) sliced = aln[1:] self.assertEqual(len(sliced), 3) self.assertEqual(str(sliced[0]), 'bfjn') self.assertEqual(str(sliced[1]), 'cgko') self.assertEqual(str(sliced[2]), 'dhlp') sliced = aln[:-1] self.assertEqual(len(sliced), 3) self.assertEqual(str(sliced[0]), 'aeim') self.assertEqual(str(sliced[1]), 'bfjn') self.assertEqual(str(sliced[2]), 'cgko') sliced = aln[-1:] self.assertEqual(len(sliced), 1) self.assertEqual(str(sliced[0]), 'dhlp')
def testSearchNotWorking(self): # successful search query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+ 'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+ 'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+ 'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+ 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) search_file = self.hh.Search("doesnotexist.a3m", 'testfiles/hhblitsdb/hhblitsdb') self.assertEqual(search_file, None)
def testOSTSequenceNoWDir(self): # OST sequence as input, no working dir should work query_seq = seq.CreateSequence('1AKE.B', 'MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRA'+ 'AVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQ'+ 'EDCRNGFLLDGFPRTIPQADAMKEAGINVDYVLEFDV'+ 'PDELIVDRIVGRRVHAPSGRVYHVKFNPPKVEGKDDV'+ 'TGEELTTRKDDQEETVRKRLVEYHQMTAPLIGYYYYS'+ 'KEAEAGNTKYAKVDGTKPVAEVRADLEKILG') self.hh = hhblits.HHblits(query_seq, self.hhroot) # this works only as long as utils.TempDirWithFiles() names the first # fasta file seq01.fasta self.assertEqual(self.hh.filename[-11:], 'seq01.fasta')
def testA3mToProfileWithExistingFile(self): # test A3mToProfile to work with an existing hmm file query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+ 'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+ 'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+ 'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+ 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) hhfile = self.hh.A3MToProfile("testfiles/testali.a3m", hhm_file="testfiles/test.hmm") # when the hmm file already exists, its not touched, so files should be # completely equal. self.assertTrue(filecmp.cmp(hhfile, "testfiles/test.hmm"))
def testA3mToProfileWithoutA3m(self): # test A3mToProfile to fail if we do not have an a3m file. query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+ 'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+ 'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+ 'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+ 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) with self.assertRaises(IOError) as ioe: self.hh.A3MToProfile("doesnotexist.a3m") self.assertEqual(ioe.exception.errno, None) self.assertEqual(ioe.exception.args[0], "could not convert a3m to hhm file")
def _ParseiAlign(lines): info_line = lines[18].split(',') is_score = float(info_line[0].split('=')[1].strip()) aln_residues = int(lines[19].split('=')[1].strip()) aln_contacts = int(lines[20].split('=')[1].strip()) info_line = lines[21].split(',') rmsd = float(info_line[0].split('=')[1].strip()) tf1 = [float(i.strip()) for i in lines[25][1:].split()] tf2 = [float(i.strip()) for i in lines[26][1:].split()] tf3 = [float(i.strip()) for i in lines[27][1:].split()] rot = geom.Mat3(tf1[2], tf1[3], tf1[4], tf2[2], tf2[3], tf2[4], tf3[2], tf3[3], tf3[4]) tf = geom.Mat4(rot) tf.PasteTranslation(geom.Vec3(tf1[1], tf2[1], tf3[1])) seq1 = seq.CreateSequence("1", lines[32].strip()) seq2 = seq.CreateSequence("2", lines[34].strip()) alignment = seq.CreateAlignment() alignment.AddSequence(seq2) alignment.AddSequence(seq1) return iAlignResult(rmsd, tf, alignment, is_score, aln_residues, aln_contacts)
def AlignmentFromChainView(chain, handle_seq_name='handle', view_seq_name='view'): """ Creates and returns the sequence alignment of the given chain view to the chain handle. The alignment contains two sequences, the first containing all non-ligand peptide-linking residues, the second containing all non-ligand peptide-linking residues that are part of the view. :param chain: A valid chain :type chain: :class:`~ost.mol.ChainView` :param handle_seq_name: Name of the handle sequence in the output alignment :param view_seq_name: Name of the view sequence in the output alignment :returns: The alignment :rtype: :class:`~ost.seq.AlignmentHandle` """ from ost import seq v0 = chain.handle.Select('ligand=false and peptide=true') v1 = chain.Select('ligand=false and peptide=true') s0 = seq.CreateSequence(handle_seq_name, '') s1 = seq.CreateSequence(view_seq_name, '') s0.AttachView(v0) s1.AttachView(v1) res0 = v0.residues res1 = v1.residues idx0, idx1 = (0, 0) while idx0 < len(res0): s0.Append(res0[idx0].one_letter_code) if idx1 < len(res1) and res1[idx1].handle == res0[idx0].handle: s1.Append(res1[idx1].one_letter_code) idx1 += 1 else: s1.Append('-') idx0 += 1 return seq.CreateAlignment(s0, s1)
def testA3mToCSFileExists(self): # test A3mToCS to work with a given hhmake_file name query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+ 'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+ 'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+ 'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+ 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) csfile = self.hh.A3MToCS("testfiles/testali.a3m", cs_file='testfiles/test.seq219', options={'-alphabet' : os.path.join(self.hh.hhlib_dir, 'data', 'cs219.lib')}) self.assertTrue(filecmp.cmp(csfile, 'testfiles/test.seq219'))
def testOSTSequenceWDir(self): # OST sequence as input, with working dir should work query_seq = seq.CreateSequence('1AKE.B', 'MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRA'+ 'AVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQ'+ 'EDCRNGFLLDGFPRTIPQADAMKEAGINVDYVLEFDV'+ 'PDELIVDRIVGRRVHAPSGRVYHVKFNPPKVEGKDDV'+ 'TGEELTTRKDDQEETVRKRLVEYHQMTAPLIGYYYYS'+ 'KEAEAGNTKYAKVDGTKPVAEVRADLEKILG') tmpdir = tempfile.mkdtemp() self.hh = hhblits.HHblits(query_seq, self.hhroot, working_dir=tmpdir) self.assertEqual(self.hh.filename, os.path.join(tmpdir, 'query_hhblits.fasta')) self.assertEqual(self.hh.working_dir, tmpdir) self.hh.needs_cleanup = True
def testA3mToProfileWithoutFileName(self): # test A3mToProfile to work without a given hhmake_file name query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+ 'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+ 'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+ 'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+ 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) hhfile = self.hh.A3MToProfile("testfiles/testali.a3m") with open(hhfile) as tfh: tlst = tfh.readlines() with open("testfiles/test.hmm") as efh: elst = efh.readlines() self.assertEqual(len(elst), len(tlst)) for i in range(0, len(elst)): if not elst[i].startswith(('FILE', 'COM', 'DATE')): self.assertEqual(elst[i], tlst[i]) os.remove(hhfile)
def testSearchWorking(self): # successful search query_seq = seq.CreateSequence('Test', 'VLSPADKTNVKAAWGKVGAHAGEYGAEA'+ 'LERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVAD'+ 'ALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL'+ 'LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVL'+ 'TSKYR') self.hh = hhblits.HHblits(query_seq, self.hhroot) search_file = self.hh.Search("testfiles/testali.a3m", 'testfiles/hhblitsdb/hhblitsdb') with open(search_file) as tfh: tlst = tfh.readlines() with open("testfiles/test.hhr") as efh: elst = efh.readlines() self.assertEqual(len(elst), len(tlst)) for i in range(0, len(elst)): if not elst[i].startswith(('Date', 'Command')): self.assertEqual(elst[i], tlst[i])
def __DisplayAlignment(self, ent_list, res_list): if(len(res_list)>0): ref_seq = seq.CreateSequence("%s (ref)"%ent_list[0].GetName(), res_list[0].alignment.GetSequence(1).GetGaplessString()) aln_list = seq.AlignmentList() if(ref_seq.IsValid()): for i in range(0, len(res_list)): # WrappedTMAlign returns an alignment with second sequence # being reference... let's swap... new_aln = seq.CreateAlignment() new_aln.AddSequence(res_list[i].alignment.GetSequence(1)) new_aln.AddSequence(res_list[i].alignment.GetSequence(0)) new_aln.SetSequenceName(1, ent_list[i+1].GetName()) aln_list.append(new_aln) alignment = alg.MergePairwiseAlignments(aln_list, ref_seq) gosty = gui.GostyApp.Instance() main_area = gosty.perspective.GetMainArea() if self.seq_viewer: self.seq_viewer.qobject.close() self.seq_viewer = gui.SequenceViewer(True) self.seq_viewer.AddAlignment(alignment) self.seq_viewer.ChangeDisplayMode("Highlight conservation 1") self.seq_viewer.Show()
def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True): """ Aligns the residues of chain to the SEQRES sequence, inserting gaps where needed. The function uses the connectivity of the protein backbone to find consecutive peptide fragments. These fragments are then aligned to the SEQRES sequence. All the non-ligand, peptide-linking residues of the chain must be listed in SEQRES. If there are any additional residues in the chain, the function raises a ValueError. :param chain: Source of the sequence :type chain: :class:`~ost.mol.ChainHandle` :param seqres: SEQRES sequence :type seqres: :class:`str` :param try_resnum_first: If set to True, this first builds an alignment using residue numbers and checks if the one-letter-codes match. If they all match, this alignment is used (and possibly validated). Otherwise, it displays a warning and falls back to the connectivity-based alignment. :type try_resnum_first: :class:`bool` :param validate: If set to True, the alignment is additionally checked by :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises a ValueError if the validation failed. :type validate: :class:`bool` :returns: The alignment of the residues in the chain and the SEQRES entries. :rtype: :class:`~ost.seq.AlignmentHandle` """ def IsEqual(olc1, olc2): return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2 from ost import seq from ost import mol from ost import LogWarning view = chain residues = view.residues if len(residues) == 0: return seq.CreateAlignment() if try_resnum_first: aln_seq = seq.CreateSequence('atoms', '-' * len(seqres)) for r1 in residues: if r1.number.num <= len(seqres) and r1.number.num > 0: if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code): aln_seq[r1.number.num - 1] = r1.one_letter_code else: LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code + '", while SEQRES is "' + seqres[r1.number.num - 1] + '" at the corresponding position.') try_resnum_first = False break if not try_resnum_first: fragments = [residues[0].one_letter_code] for r1, r2 in zip(residues[:-1], residues[1:]): if not mol.InSequence(r1.handle, r2.handle): fragments.append('') fragments[-1] += r2.one_letter_code ss = str(seqres) pos = 0 aln_seq = '' for frag in fragments: new_pos = ss.find(frag, pos) if new_pos == -1: raise ValueError('"%s" is not a substring of "%s"' % (frag, ss)) aln_seq += '-' * (new_pos - pos) + frag pos = new_pos + len(frag) aln_seq = seq.CreateSequence( 'atoms', aln_seq + ('-' * (len(seqres) - len(aln_seq)))) alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)), aln_seq) if validate and not ValidateSEQRESAlignment(alignment, view): raise ValueError( "SEQRES cannot be aligned with its corresponding chain.") return alignment
def ParseA3M(a3m_file): ''' Parse secondary structure information and the multiple sequence alignment out of an A3M file as produced by :meth:`HHblits.BuildQueryMSA`. :param a3m_file: Iterable containing the lines of the A3M file :type a3m_file: iterable (e.g. an open file handle) :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf" (:class:`list`) and "msa" (:class:`~ost.seq.AlignmentHandle`). ''' profile_dict = dict() state = 'NONE' pred_seq_txt = '' conf_seq_txt = '' msa_seq = list() msa_head = list() for line in a3m_file: if len(line.rstrip()) == 0: continue elif line.startswith('>ss_pred'): state = 'sspred' continue elif line.startswith('>ss_conf'): state = 'ssconf' continue elif line[0] == '>': if state == 'ssconf' or state == 'msa': msa_seq.append('') msa_head.append(line[1:].rstrip()) else: raise IOError('The A3M file is missing the "ss_conf" section') state = 'msa' continue if state == 'sspred': pred_seq_txt += line.rstrip() elif state == 'ssconf': conf_seq_txt += line.rstrip() elif state == 'msa': msa_seq[len(msa_seq)-1] += line.rstrip() profile_dict['ss_pred'] = list() profile_dict['ss_conf'] = list() for i in range(0, len(pred_seq_txt)): profile_dict['ss_pred'].append(pred_seq_txt[i]) profile_dict['ss_conf'].append(int(conf_seq_txt[i])) # post processing # MSA profile_dict['msa'] = None if len(msa_seq) > 1: t = msa_seq[0] al = seq.AlignmentList() for i in range(1, len(msa_seq)): qs = '' ts = '' k = 0 for c in msa_seq[i]: if c.islower(): qs += '-' ts += c.upper() else: qs += t[k] ts += c k += 1 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), seq.CreateSequence(msa_head[i], ts)) al.append(nl) profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\ al, seq.CreateSequence(msa_head[0], t)) return profile_dict
def ParseHHM(profile): ''' Parse secondary structure information and the MSA out of an HHM profile as produced by :meth:`HHblits.A3MToProfile`. :param profile: Opened file handle holding the profile. :type profile: :class:`file` :return: Dictionary containing "ss_pred" (:class:`list`), "ss_conf" (:class:`list`), "msa" (:class:`~ost.seq.AlignmentHandle`) and "consensus" (:class:`~ost.seq.SequenceHandle`). ''' profile_dict = dict() state = 'NONE' pred_seq_txt = '' conf_seq_txt = '' consensus_txt = '' msa_seq = list() msa_head = list() for line in profile: if len(line.rstrip()) == 0: continue if line.rstrip() == '>ss_pred PSIPRED predicted secondary structure': state = 'sspred' continue elif line.rstrip() == '>ss_conf PSIPRED confidence values': state = 'ssconf' continue elif line.rstrip() == '>Consensus': state = 'consensus' continue elif line[0] == '>': if state == 'consensus' or state == 'msa': msa_seq.append('') msa_head.append(line[1:].rstrip()) else: raise IOError('Profile file "%s" is missing ' % profile.name+ 'the "Consensus" section') state = 'msa' continue elif line[0] == '#': state = 'NONE' continue if state == 'sspred': pred_seq_txt += line.rstrip() elif state == 'ssconf': conf_seq_txt += line.rstrip() elif state == 'msa': msa_seq[len(msa_seq)-1] += line.rstrip() elif state == 'consensus': consensus_txt += line.rstrip() profile_dict['ss_pred'] = list() profile_dict['ss_conf'] = list() for i in range(0, len(pred_seq_txt)): profile_dict['ss_pred'].append(pred_seq_txt[i]) profile_dict['ss_conf'].append(int(conf_seq_txt[i])) # post processing # MSA profile_dict['msa'] = None if len(msa_seq): t = msa_seq[0] al = seq.AlignmentList() for i in range(1, len(msa_seq)): qs = '' ts = '' k = 0 for c in msa_seq[i]: if c.islower(): qs += '-' ts += c.upper() else: qs += t[k] ts += c k += 1 nl = seq.CreateAlignment(seq.CreateSequence(msa_head[0], qs), seq.CreateSequence(msa_head[i], ts)) al.append(nl) profile_dict['msa'] = seq.alg.MergePairwiseAlignments(\ al, seq.CreateSequence(msa_head[0], t)) #print profile_dict['msa'].ToString(80) # Consensus profile_dict['consensus'] = seq.CreateSequence('Consensus', consensus_txt) return profile_dict
def testSeqIterBZDNG148(self): s = seq.CreateSequence('A', 'abcdef') for x in s: pass
def testValidateEmptySequenceWorking(self): alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', ''), seq.CreateSequence('atoms', '')) chain = mol.ChainHandle() self.assertEqual(seq.alg.ValidateSEQRESAlignment(alignment, chain), True)