def _read(self, structure, f_id, distance_cutoff, atom_type): """Read a contact file Parameters ---------- structure A :obj:`~Bio.PDB.Structure.Structure>` instance f_id : str Unique contact file identifier distance_cutoff : int Distance cutoff for which to determine contacts atom_type : str Atom type between which distances are calculated Returns ------- :obj:`~conkit.core.contactfile.ContactFile~` """ hierarchies = [] for model in structure: hierarchy = ContactFile(f_id + '_' + str(model.id)) chains = list(chain for chain in model) for chain in chains: self._remove_hetatm(chain) self._remove_atom(chain, atom_type) for chain1, chain2 in itertools.product(chains, chains): if chain1.id == chain2.id: # intra contact_map = ContactMap(chain1.id) else: # inter contact_map = ContactMap(chain1.id + chain2.id) for (atom1, atom2, distance) in self._chain_contacts(chain1, chain2): contact = Contact(atom1.resseq, atom2.resseq, round(1.0 - (distance / 100), 6), distance_bound=(0., float(distance_cutoff))) contact.res1_altseq = atom1.resseq_alt contact.res2_altseq = atom2.resseq_alt contact.res1 = atom1.resname contact.res2 = atom2.resname contact.res1_chain = atom1.reschain contact.res2_chain = atom2.reschain if distance_cutoff == 0 or distance < distance_cutoff: contact.true_positive = True contact_map.add(contact) if contact_map.empty: del contact_map else: if len(contact_map.id) == 1: contact_map.sequence = self._build_sequence(chain1) assert len(contact_map.sequence.seq) == len(chain1) else: contact_map.sequence = self._build_sequence(chain1) \ + self._build_sequence(chain2) assert len(contact_map.sequence.seq) \ == len(chain1) + len(chain2) hierarchy.add(contact_map) hierarchy.method = 'Contact map extracted from PDB ' + str( model.id) hierarchy.remark = [ 'The model id is the chain identifier, i.e XY equates to chain X and chain Y.', 'Residue numbers in column 1 are chain X, and numbers in column 2 are chain Y.' ] hierarchies.append(hierarchy) if len(hierarchies) > 1: msg = "Super-level to contact file not yet implemented. " \ "Parser returns hierarchy for top model only!" warnings.warn(msg, FutureWarning) return hierarchies[0]
def read(self, f_handle, f_id="gremlin"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`ContactFile <conkit.core.contactfile.ContactFile>` """ hierarchy = ContactFile(f_id) lines = iter([l.rstrip() for l in f_handle if l.rstrip()]) done = object() line = next(lines, done) inter = False chain_list = set() contact_list = [] while line is not done: if RE_COMMENT.match(line): hierarchy.remark = RE_COMMENT.match(line).group(1) elif RE_HEADER_INTRA.match(line): inter = False elif RE_HEADER_INTER.match(line): inter = True else: if inter: res1_seq, res2_seq, chain, _, _, raw_score, scalar_score, _, _ = RE_SPLIT.split(line) else: res1_seq, res2_seq, _, _, raw_score, scalar_score, _ = RE_SPLIT.split(line) chain = 'UNK' c = Contact(int(res1_seq), int(res2_seq), float(raw_score)) c.scalar_score = float(scalar_score) if chain == 'UNK': chain_list.add('UNK') elif len(chain) == 1: c.res1_chain = chain[0] c.res2_chain = chain[0] chain_list.add((c.res1_chain, c.res2_chain)) elif len(chain) == 2: c.res1_chain = chain[0] c.res2_chain = chain[1] chain_list.add((c.res1_chain, c.res2_chain)) elif len(chain) > 2: raise ValueError('Cannot distinguish between chains') contact_list.append(c) line = next(lines, done) chain_list = list(chain_list) if len(chain_list) == 1 and chain_list[0] == 'UNK': contact_map = ContactMap('1') for c in contact_list: contact_map.add(c) hierarchy.add(contact_map) elif len(chain_list) == 1: chain = chain_list[0] map_id = chain[0] if chain[0] == chain[1] else "".join(chain) contact_map = ContactMap(map_id) for c in contact_list: contact_map.add(c) hierarchy.add(contact_map) else: for chain in chain_list: map_id = chain[0] if chain[0] == chain[1] else "".join(chain) contact_map = ContactMap(map_id) for c in contact_list: if c.res1_chain == chain[0] and c.res2_chain == chain[1]: contact_map.add(c) hierarchy.add(contact_map) hierarchy.sort('id', inplace=True) return hierarchy
def read(self, f_handle, f_id="casp"): """Read a contact file into a :obj:`~conkit.core.contactfile.ContactFile` instance Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ lines = [l.strip() for l in f_handle.readlines()] contact_file = ContactFile(f_id) it = iter(lines) while True: try: line = next(it) except StopIteration: break if RE_PRFMAT.match(line): continue elif RE_TARGET.match(line): contact_file.remark = RE_TARGET.match(line).group(1) elif RE_AUTHOR.match(line): contact_file.author = RE_AUTHOR.match(line).group(1) elif RE_REMARK.match(line): contact_file.remark = RE_REMARK.match(line).group(1) elif RE_METHOD.match(line): contact_file.method = RE_METHOD.match(line).group(1) elif RE_MODEL.match(line): contact_map = ContactMap(RE_MODEL.match(line).group(1)) seq_chunks = [] while True: try: line = next(it) except StopIteration: break if not line: break if RE_ENDMDL.match(line): break elif RE_END.match(line): break elif RE_SEQ.match(line): seq_chunks.append(line) else: res1_entry, res2_entry, lb, ub, raw_score = RE_SPLIT.split( line) # Split in case we have chain in inter-molecular scenarios res1_split = RE_RES.split(res1_entry) if len(res1_split) == 1: res1_chain, res1_seq = "", res1_split[0] elif len(res1_split) == 4: res1_chain, res1_seq = res1_split[1], res1_split[2] res2_split = RE_RES.split(res2_entry) if len(res2_split) == 1: res2_chain, res2_seq = "", res2_split[0] elif len(res2_split) == 4: res2_chain, res2_seq = res2_split[1], res2_split[2] contact = Contact(int(res1_seq), int(res2_seq), float(raw_score), distance_bound=(float(lb), float(ub))) contact.res1_chain = res1_chain contact.res2_chain = res2_chain contact.res1_altseq = int(res1_seq) contact.res2_altseq = int(res2_seq) contact_map.add(contact) if seq_chunks: seq = "".join(seq_chunks) sequence = Sequence("seq_{}".format(contact_map.id), seq) contact_map.sequence = sequence contact_map.set_sequence_register() contact_file.add(contact_map) elif RE_END.match(line): break else: raise ValueError( "Unrecognized line type. Please report this issue") return contact_file
def read(self, f_handle, f_id="pcons"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ contact_file = ContactFile(f_id) contact_map = ContactMap("1") contact_file.add(contact_map) lines = iter([l.rstrip() for l in f_handle if l.rstrip()]) done = object() line = next(lines, done) seq = '' seq_id = 'seq_1' while line is not done: if not line: pass elif RE_GENERATED.match(line): contact_file.remark = line elif RE_SEQUENCE_NAME.match(line): seq_id = RE_SEQUENCE_NAME.match(line).group(1) elif RE_SEQUENCE.match(line): line = next(lines, done) while line is not done: if not line: break elif RE_CONTACT_HEADER.match(line): break elif RE_PRED_CONTACTS.match(line): break elif RE_CONTACT.match(line): break else: seq += line line = next(lines, done) if RE_CONTACT.match(line): res1_seq, res2_seq, raw_score = line.split() contact = Contact(int(res1_seq), int(res2_seq), float(raw_score)) contact_map.add(contact) line = next(lines, done) if seq: contact_map.sequence = Sequence(seq_id, seq) contact_file.method = 'Contact map predicted using Pcons' return contact_file
def test_remark_5(self): contact_file = ContactFile("test") contact_file.remark = "hello" contact_map = ContactMap("foo") contact_file.add(contact_map) self.assertEqual(["hello"], contact_file.remark)
def test_remark_2(self): contact_file = ContactFile("test") contact_file.remark = "Hello" contact_file.remark = "World" self.assertEqual(["Hello", "World"], contact_file.remark)
def test_remark_1(self): contact_file = ContactFile("test") contact_file.remark = "Hello" self.assertEqual(["Hello"], contact_file.remark)