def test_method_3(self): contact_file = ContactFile("test") contact_file.method = "Hello" contact_file.method = "5" contact_file.method = "World" contact_file.method = "!" self.assertEqual(["Hello", "5", "World", "!"], contact_file.method)
def read(self, f_handle, f_id="map_align"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ hierarchy = ContactFile(f_id) _map = ContactMap("map_1") hierarchy.add(_map) for line in f_handle: line = line.strip().split() if len(line) == 2 and line[0].isdigit() and line[1].isdigit(): # Al-eigen has no score field so we assume score=0.5 _contact = Contact(int(line[0]), int(line[1]), 0.5) _map.add(_contact) hierarchy.method = "Contact map compatible with Al-Eigen" return hierarchy
def read(self, f_handle, f_id="map_align"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ hierarchy = ContactFile(f_id) _map = ContactMap("map_1") hierarchy.add(_map) for line in f_handle: line = line.strip().split() if line[0] == "CON" and line[1].isdigit() and line[2].isdigit(): _contact = Contact(int(line[1]), int(line[2]), float(line[3])) _map.add(_contact) hierarchy.method = "Contact map compatible with map_algin" return hierarchy
def test_write_1(self): contact_file = ContactFile('RR') contact_file.target = 'R9999' contact_file.author = '1234-5678-9000' contact_file.remark = ['Predictor remarks'] contact_file.method = [ 'Description of methods used', 'Description of methods used' ] contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence('1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD') contact_map.set_sequence_register() f_name = create_tmp_f() with open(f_name, 'w') as f_out: ComsatParser().write(f_out, contact_file) content = [ "1 H 9 L Hx-Hx", "1 H 10 L Hx-Hx", "2 L 8 I Hx-Hx", "3 E 12 K Hx-Hx", ] with open(f_name, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(content, output) os.unlink(f_name)
def read(self, f_handle, f_id="ccmpred"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ contact_file = ContactFile(f_id) contact_file.method = "Contact map predicted using CCMpred" contact_map = ContactMap("map_1") contact_file.add(contact_map) # Bits ripped from Stefan Seemayer's script shipped with CCMpred mat = np.loadtxt(f_handle) if mat.size > 0: raw_contacts = self._get_contact_pairs(mat) for res1_seq, res2_seq, raw_score in zip(raw_contacts[0], raw_contacts[1], mat[raw_contacts]): if res1_seq > res2_seq: continue # Matrix starts count at 0 so increment numbers by one straight away contact = Contact(int(res1_seq + 1), int(res2_seq + 1), float(raw_score)) contact_map.add(contact) return contact_file
def test_write_1(self): contact_file = ContactFile('RR') contact_file.target = 'R9999' contact_file.author = '1234-5678-9000' contact_file.remark = ['Predictor remarks'] contact_file.method = ['Description of methods used', 'Description of methods used'] contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence('1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD') contact_map.assign_sequence_register() f_name = create_tmp_f() with open(f_name, 'w') as f_out: PsicovParser().write(f_out, contact_file) content = [ "1 9 0 8 0.700000", "1 10 0 8 0.700000", "2 8 0 8 0.900000", "3 12 0 8 0.400000", "", ] content = os.linesep.join(content) with open(f_name, 'r') as f_in: data = "".join(f_in.readlines()) self.assertEqual(content, data) os.unlink(f_name)
def read(self, f_handle, f_id="freecontact"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ hierarchy = ContactFile(f_id) contact_map = ContactMap("map_1") hierarchy.add(contact_map) for line in f_handle: line = line.strip() if line: res1_seq, res1, res2_seq, res2, raw_score, _ = RE_SPLIT.split( line) contact = Contact(int(res1_seq), int(res2_seq), float(raw_score)) contact.res1 = res1 contact.res2 = res2 contact_map.add(contact) hierarchy.method = "Contact map predicted using FreeContact" return hierarchy
def read(self, f_handle, f_id="epcmap"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ hierarchy = ContactFile(f_id) _map = ContactMap("map_1") hierarchy.add(_map) for line in f_handle: line = line.strip().split() if not line or line[0].isalpha(): continue elif line[0].isdigit(): _contact = Contact( int(line[0]), int(line[1]), float(line[4]), distance_bound=(float(line[2]), float(line[3])) ) _map.add(_contact) hierarchy.method = "Contact map predicted using EPC-Map" return hierarchy
def test_write_1(self): contact_file = ContactFile("RR") contact_file.target = "R9999" contact_file.author = "1234-5678-9000" contact_file.remark = ["Predictor remarks"] contact_file.method = ["Description of methods used", "Description of methods used"] contact_map = ContactMap("1") contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence("1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD") contact_map.set_sequence_register() f_name = self.tempfile() with open(f_name, "w") as f_out: MemBrainParser().write(f_out, contact_file) content = [ "Helix Position Residue Helix Position Residue Probability", "Hx 1 H Hx 9 L 0.700000", "Hx 1 H Hx 10 L 0.700000", "Hx 2 L Hx 8 I 0.900000", "Hx 3 E Hx 12 K 0.400000", ] with open(f_name, "r") as f_in: output = f_in.read().splitlines() self.assertEqual(content, output)
def read(self, f_handle, f_id="plmdca"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ contact_file = ContactFile(f_id) contact_map = ContactMap("map_1") contact_file.add(contact_map) for line in f_handle: line = line.strip() if not line or line[0].isalpha(): continue elif line[0].isdigit(): res1_seq, res2_seq, raw_score = line.split(',') contact = Contact(int(res1_seq), int(res2_seq), float(raw_score)) contact_map.add(contact) contact_file.method = 'Contact map predicted using plmDCA' return contact_file
def read(self, f_handle, f_id="ncont"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ contact_file = ContactFile(f_id) contact_map = ContactMap("map_1") contact_file.add(contact_map) for line in f_handle: line = line.strip() if RE_CONTACT.match(line): matches = RE_CONTACT.match(line) res1_seq = int(matches.group(2)) res2_seq = int(matches.group(5)) lb = ub = float(matches.group(7)) if (res1_seq, res2_seq) in contact_map: msg = ( "This parser cannot handle multiple atoms of the same residue. " "If your contact map contains such entries, only the first will be stored!" ) warnings.warn(msg, Warning) continue contact = Contact(res1_seq, res2_seq, 1.0, distance_bound=(lb, ub)) contact.res1_chain = matches.group(1) contact.res2_chain = matches.group(4) contact.res1 = matches.group(3) contact.res2 = matches.group(6) contact_map.add(contact) contact_file.method = "Contact map generated using Ncont" return contact_file
def test_write_1(self): contact_file = ContactFile('RR') contact_file.target = 'R9999' contact_file.author = '1234-5678-9000' contact_file.remark = ['Predictor remarks'] contact_file.method = [ 'Description of methods used', 'Description of methods used' ] contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence('sequence_1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD') contact_map.assign_sequence_register() f_name = create_tmp_f() with open(f_name, 'w') as f_out: PconsParser().write(f_out, contact_file) content = [ "##############################################################################", "PconsC3 result file", "Generated using ConKit", "##############################################################################", "Sequence number: 1", "Sequence name: sequence_1", "Sequence length: 33 aa.", "Sequence:", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD", "", "", "Predicted contacts:", "Res1 Res2 Score", " 1 9 0.700000", " 1 10 0.700000", " 2 8 0.900000", " 3 12 0.400000", "", "##############################################################################", "", ] content = os.linesep.join(content) with open(f_name, 'r') as f_in: data = "".join(f_in.readlines()) self.assertEqual(content, data) os.unlink(f_name)
def read(self, f_handle, f_id="bbcontacts", del_one_two=False): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier del_one_two : bool Remove one- & two-strand sheets Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ contact_file = ContactFile(f_id) contact_map = ContactMap("map_1") contact_file.add(contact_map) previous = "first" for line in f_handle: line = line.strip() if line and not line.startswith("#"): _, _, _, raw_score, _, current, res2_seq, res1_seq = line.split( ) if del_one_two and previous == "first" and current == "last": contact_map.child_list.pop() elif any(value == "NA" for value in [raw_score, res2_seq, res1_seq]): pass else: contact = Contact(int(res1_seq), int(res2_seq), float(raw_score)) contact_map.add(contact) previous = current if del_one_two and previous == "first" and len(contact_map) > 0: contact_map.child_list.pop() contact_file.method = "Contact map predicted using Bbcontacts" return contact_file
def test_write_1(self): contact_file = ContactFile("RR") contact_file.target = "R9999" contact_file.author = "1234-5678-9000" contact_file.remark = ["Predictor remarks"] contact_file.method = [ "Description of methods used", "Description of methods used" ] contact_map = ContactMap("1") contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence("sequence_1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD") contact_map.set_sequence_register() f_name = self.tempfile() with open(f_name, "w") as f_out: PconsParser().write(f_out, contact_file) content = [ "##############################################################################", "PconsC3 result file", "Generated using ConKit", "##############################################################################", "Sequence number: 1", "Sequence name: sequence_1", "Sequence length: 33 aa.", "Sequence:", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD", "", "", "Predicted contacts:", "Res1 Res2 Score", " 1 9 0.700000", " 1 10 0.700000", " 2 8 0.900000", " 3 12 0.400000", "", "##############################################################################", ] with open(f_name, "r") as f_in: output = f_in.read().splitlines() self.assertEqual(content, output)
def read(self, f_handle, f_id="membrain"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ hierarchy = ContactFile(f_id) contact_map = ContactMap("map_1") hierarchy.add(contact_map) for line in f_handle: line = line.rstrip() if not line: continue if RE_HEADER.match(line): continue else: _, res1_seq, res1, _, res2_seq, res2, raw_score = RE_SPLIT.split( line) contact = Contact(int(res1_seq), int(res2_seq), float(raw_score)) contact.res1 = res1 contact.res2 = res2 contact_map.add(contact) hierarchy.method = 'Contact map predicted using MemBrain' return hierarchy
def test_write_1(self): contact_file = ContactFile("RR") contact_file.target = "R9999" contact_file.author = "1234-5678-9000" contact_file.remark = ["Predictor remarks"] contact_file.method = ["Description of methods used", "Description of methods used"] contact_map = ContactMap("1") contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence("1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD") contact_map.set_sequence_register() f_name = self.tempfile() with open(f_name, "w") as f_out: ComsatParser().write(f_out, contact_file) content = ["1 H 9 L Hx-Hx", "1 H 10 L Hx-Hx", "2 L 8 I Hx-Hx", "3 E 12 K Hx-Hx"] with open(f_name, "r") as f_in: output = f_in.read().splitlines() self.assertEqual(content, output)
def test_write_1(self): contact_file = ContactFile('RR') contact_file.target = 'R9999' contact_file.author = '1234-5678-9000' contact_file.remark = ['Predictor remarks'] contact_file.method = [ 'Description of methods used', 'Description of methods used' ] contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence('1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD') contact_map.set_sequence_register() f_name = create_tmp_f() with open(f_name, 'w') as f_out: CaspParser().write(f_out, contact_file) content = [ "PFRMAT RR", "TARGET R9999", "AUTHOR 1234-5678-9000", "REMARK Predictor remarks", "METHOD Description of methods used", "METHOD Description of methods used", "MODEL 1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD", "1 9 0 8 0.700000", "1 10 0 8 0.700000", "2 8 0 8 0.900000", "3 12 0 8 0.400000", "ENDMDL", "END", ] with open(f_name, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(content, output) os.unlink(f_name)
def read(self, f_handle, f_id="comsat"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.contactfile.ContactFile` """ contact_file = ContactFile(f_id) contact_map = ContactMap("map_1") contact_file.add(contact_map) for line in f_handle: line = line.rstrip() if not line: continue else: res1_seq, res1, res2_seq, res2, _ = RE_SPLIT.split(line) contact = Contact(int(res1_seq), int(res2_seq), 0.0) contact.res1 = res1 contact.res2 = res2 contact_map.add(contact) contact_file.method = "Contact map predicted using COMSAT" return contact_file
def test_method_5(self): contact_file = ContactFile("test") contact_file.method = "hello" contact_map = ContactMap("foo") contact_file.add(contact_map) self.assertEqual(["hello"], contact_file.method)
def read(self, f_handle, f_id="pcons"): """Read a contact file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`ContactFile <conkit.core.contactfile.ContactFile>` """ contact_file = ContactFile(f_id) contact_map = ContactMap("1") contact_file.add(contact_map) lines = iter([l.rstrip() for l in f_handle if l.rstrip()]) done = object() line = next(lines, done) seq = '' seq_id = 'seq_1' while line is not done: if not line: pass elif RE_GENERATED.match(line): contact_file.remark = line elif RE_SEQUENCE_NAME.match(line): seq_id = RE_SEQUENCE_NAME.match(line).group(1) elif RE_SEQUENCE.match(line): line = next(lines, done) while line is not done: if not line: break elif RE_CONTACT_HEADER.match(line): break elif RE_PRED_CONTACTS.match(line): break elif RE_CONTACT.match(line): break else: seq += line line = next(lines, done) if RE_CONTACT.match(line): res1_seq, res2_seq, raw_score = line.split() contact = Contact(int(res1_seq), int(res2_seq), float(raw_score)) contact_map.add(contact) line = next(lines, done) if seq: contact_map.sequence = Sequence(seq_id, seq) contact_file.method = 'Contact map predicted using Pcons' return contact_file
def read(self, f_handle, f_id="casp"): """Read a contact file into a :obj:`conkit.core.contactfile.ContactFile` instance Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`ContactFile <conkit.core.contactfile.ContactFile>` """ lines = [l.strip() for l in f_handle.readlines()] contact_file = ContactFile(f_id) it = iter(lines) while True: try: line = next(it) except StopIteration: break if RE_PRFMAT.match(line): continue elif RE_TARGET.match(line): contact_file.remark = RE_TARGET.match(line).group(1) elif RE_AUTHOR.match(line): contact_file.author = RE_AUTHOR.match(line).group(1) elif RE_REMARK.match(line): contact_file.remark = RE_REMARK.match(line).group(1) elif RE_METHOD.match(line): contact_file.method = RE_METHOD.match(line).group(1) elif RE_MODEL.match(line): contact_map = ContactMap(RE_MODEL.match(line).group(1)) seq_chunks = [] while True: try: line = next(it) except StopIteration: break if not line: break if RE_ENDMDL.match(line): break elif RE_END.match(line): break elif RE_SEQ.match(line): seq_chunks.append(line) else: res1_entry, res2_entry, lb, ub, raw_score = RE_SPLIT.split(line) # Split in case we have chain in inter-molecular scenarios res1_split = RE_RES.split(res1_entry) if len(res1_split) == 1: res1_chain, res1_seq = '', res1_split[0] elif len(res1_split) == 4: res1_chain, res1_seq = res1_split[1], res1_split[2] res2_split = RE_RES.split(res2_entry) if len(res2_split) == 1: res2_chain, res2_seq = '', res2_split[0] elif len(res2_split) == 4: res2_chain, res2_seq = res2_split[1], res2_split[2] contact = Contact(int(res1_seq), int(res2_seq), float(raw_score), distance_bound=(float(lb), float(ub))) contact.res1_chain = res1_chain contact.res2_chain = res2_chain contact.res1_altseq = int(res1_seq) contact.res2_altseq = int(res2_seq) contact_map.add(contact) if seq_chunks: seq = "".join(seq_chunks) sequence = Sequence('seq_{0}'.format(contact_map.id), seq) contact_map.sequence = sequence contact_map.assign_sequence_register() contact_file.add(contact_map) elif RE_END.match(line): break else: raise ValueError('Unrecognized line type. Please report this issue') return contact_file
def test_method_1(self): contact_file = ContactFile("test") contact_file.method = "Hello" self.assertEqual(["Hello"], contact_file.method)
def _read(self, structure, f_id, distance_cutoff, atom_type): """Read a contact file Parameters ---------- structure A :obj:`Structure <Bio.PDB.Structure.Structure>` instance f_id : str Unique contact file identifier distance_cutoff : int Distance cutoff for which to determine contacts atom_type : str Atom type between which distances are calculated Returns ------- :obj:`ContactFile <conkit.core.contactfile.ContactFile>` """ hierarchies = [] for model in structure: hierarchy = ContactFile(f_id + '_' + str(model.id)) chains = list(chain for chain in model) for chain in chains: self._remove_hetatm(chain) self._remove_atom(chain, atom_type) for chain1, chain2 in itertools.product(chains, chains): if chain1.id == chain2.id: # intra contact_map = ContactMap(chain1.id) else: # inter contact_map = ContactMap(chain1.id + chain2.id) for (atom1, atom2, distance) in self._chain_contacts(chain1, chain2): contact = Contact( atom1.resseq, atom2.resseq, round(1.0 - (distance / 100), 6), distance_bound=(0., float(distance_cutoff))) contact.res1_altseq = atom1.resseq_alt contact.res2_altseq = atom2.resseq_alt contact.res1 = atom1.resname contact.res2 = atom2.resname contact.res1_chain = atom1.reschain contact.res2_chain = atom2.reschain if distance_cutoff == 0 or distance < distance_cutoff: contact.define_match() contact_map.add(contact) if contact_map.empty: del contact_map else: if len(contact_map.id) == 1: contact_map.sequence = self._build_sequence(chain1) assert len(contact_map.sequence.seq) == len(chain1) else: contact_map.sequence = self._build_sequence(chain1) \ + self._build_sequence(chain2) assert len(contact_map.sequence.seq) \ == len(chain1) + len(chain2) hierarchy.add(contact_map) hierarchy.method = 'Contact map extracted from PDB ' + str(model.id) hierarchy.remark = [ 'The model id is the chain identifier, i.e XY equates to chain X and chain Y.', 'Residue numbers in column 1 are chain X, and numbers in column 2 are chain Y.' ] hierarchies.append(hierarchy) if len(hierarchies) > 1: msg = "Super-level to contact file not yet implemented. " \ "Parser returns hierarchy for top model only!" warnings.warn(msg, FutureWarning) return hierarchies[0]