def test_get_probability_within_distance_2(self): distogram = Distogram('test') distogram.original_file_format = 'pdb' distance = Distance(36, 86, (1, ), ((6.589181, 6.589181), ), 0.934108) distogram.add(distance) self.assertEqual(1, distance.get_probability_within_distance(8)) self.assertEqual(0, distance.get_probability_within_distance(5))
def test_reshape_bins_2(self): distogram = Distogram('test') distogram.original_file_format = 'pdb' distance = Distance(36, 86, (1, ), ((6.589181, 6.589181), ), 0.934108) distogram.add(distance) with self.assertRaises(ValueError): distance.reshape_bins(((0, 1), (1, 10), (10, np.inf)))
def dist_bins(self, dist_bins): if dist_bins is None: self._dist_bins = ((0, 4), (4, 6), (6, 8), (8, 10), (10, 12), (12, 14), (14, 16), (16, 18), (18, 20), (20, np.inf)) else: Distance._assert_valid_bins(dist_bins) self._dist_bins = dist_bins
def test_get_unique_distances_1(self): distogram = Distogram("test") distogram.add(Distance(1, 25, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(25, 1, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(7, 19, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(19, 7, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(1, 7, (0.1, 0.2, 0.55, 0.15), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.get_unique_distances(inplace=True) self.assertListEqual([[25, 1], [19, 7], [1, 7]], distogram.as_list())
def test_reshape_bins_1(self): distance = Distance(1, 25, (0.15, 0.45, 0.25, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))) new_bins = ((0, 2), (2, 8), (8, np.inf)) distance.reshape_bins(new_bins) self.assertEqual(distance.raw_score, 0.85) self.assertEqual(round(distance.get_probability_within_distance(8), 2), 0.85) self.assertTupleEqual(new_bins, distance.distance_bins) self.assertTupleEqual((0.075, 0.775, 0.15000000000000002), distance.distance_scores)
def test_find_residues_within_1(self): distogram_1 = Distogram("test_1") distogram_1.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.sequence = conkit.core.Sequence("test_seq", "AAAAA") output = distogram_1.find_residues_within(3, 7) expected = {2, 3, 5} self.assertSetEqual(expected, output)
def test_as_contactmap_1(self): distogram = Distogram("test") distogram.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) contactmap = distogram.as_contactmap() expected_res1 = [1, 2, 3] expected_res2 = [5, 3, 5] expected_raw_score = [0.75, 0.8999999999999999, 0.85] self.assertListEqual([contact.res1_seq for contact in contactmap], expected_res1) self.assertListEqual([contact.res2_seq for contact in contactmap], expected_res2) self.assertListEqual([contact.raw_score for contact in contactmap], expected_raw_score)
def test_merge_arrays_1(self): distogram_1 = Distogram("test_1") distogram_1.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.sequence = conkit.core.Sequence("test_seq", "AAAAA") distogram_2 = Distogram("test_2") distogram_2.add(Distance(1, 5, (0.45, 0.05, 0.25, 0.25), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(2, 3, (0.1, 0.15, 0.15, 0.6), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(1, 4, (0.75, 0.20, 0.05, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(3, 5, (0.05, 0.1, 0.35, 0.5), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.sequence = conkit.core.Sequence("test_seq", "AAAAA") output = Distogram.merge_arrays(distogram_1, distogram_2) output[np.isinf(output)] = 99999 output = np.nan_to_num(output).tolist() expected = [ [0.0, 0.0, 0.0, 9.0, 5.0], [0.0, 0.0, 7.0, 0.0, 0.0], [0.0, 99999, 0.0, 0.0, 2.0], [2.0, 0.0, 0.0, 0.0, 0.0], [2.0, 0.0, 99999, 0.0, 0.0]] self.assertListEqual(output, expected)
def read(self, f_handle, f_id="rosettanpz"): """Read a distance prediction file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.distancefile.DistanceFile` """ hierarchy = DistanceFile(f_id) hierarchy.original_file_format = "ROSETTA_NPZ" _map = Distogram("distogram_1") hierarchy.add(_map) prediction = np.load(f_handle, allow_pickle=True) probs = prediction['dist'] # Bin #0 corresponds with d>20A & bins #1 ~ #36 correspond with 2A<d<20A in increments of 0.5A probs = probs[:, :, [x for x in range(1, 37)] + [0]] L = probs.shape[0] for i in range(L): for j in range(i, L): _distance = Distance(i + 1, j + 1, tuple(probs[i, j, :].tolist()), DISTANCE_BINS) _map.add(_distance) return hierarchy
def test_reshape_bins_1(self): distogram = Distogram("test") distogram.add(Distance(1, 5, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(1, 4, (0.05, 0.25, 0.70, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(3, 5, (0.5, 0.1, 0.35, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) new_bins = ((0, 2), (2, 8), (8, np.inf)) distogram.reshape_bins(new_bins) expected_raw_scores = [0.95, 0.8999999999999999, 1.0, 0.95] expected_distance_scores = [(0.125, 0.825, 0.050000000000000044), (0.075, 0.825, 0.09999999999999998), (0.025, 0.975, 0.0), (0.25, 0.7, 0.050000000000000044)] expected_predicted_distances = [5.0, 5.0, 5.0, 5.0] self.assertListEqual(expected_predicted_distances, [dist.predicted_distance for dist in distogram]) self.assertListEqual([dist.distance_bins for dist in distogram], [new_bins for dist in distogram]) self.assertListEqual([dist.get_probability_within_distance(8) for dist in distogram], expected_raw_scores) self.assertListEqual([dist.distance_scores for dist in distogram], expected_distance_scores)
def read(self, f_handle, f_id="casp2"): """Read a distance prediction file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.distancefile.DistanceFile` """ hierarchy = DistanceFile(f_id) hierarchy.original_file_format = "CASPRR_MODE_2" _map = Distogram("distogram_1") hierarchy.add(_map) for line in f_handle.readlines(): line = line.lstrip().rstrip().split() if not line or len(line) != 13 or not line[0].isdigit() or not line[1].isdigit(): continue res1_seq = int(line[0]) res2_seq = int(line[1]) raw_score = float(line[2]) distance_scores = tuple([float(p) for p in line[3:]]) _distance = Distance(res1_seq, res2_seq, distance_scores, DISTANCE_BINS, raw_score=raw_score) _map.add(_distance) return hierarchy
def test_as_array_1(self): distogram = Distogram("test") distogram.add(Distance(1, 5, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(1, 4, (0.05, 0.25, 0.70, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(3, 5, (0.5, 0.1, 0.35, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) output = np.nan_to_num(distogram.as_array(seq_len=5)).tolist() expected = [ [0., 0., 0., 7.0, 5.0], [0., 0., 7.0, 0., 0.], [0., 7.0, 0., 0., 2.0], [7.0, 0., 0., 0., 0.], [5.0, 0., 2.0, 0., 0.] ] self.assertListEqual(output, expected)
def test_write_1(self): expected_output = """PFRMAT RR RMODE 2 1 6 0.199696 0.043889 0.085795 0.070011 0.071518 0.054028 0.213284 0.069087 0.097959 0.090083 0.204345 1 7 0.233644 0.049411 0.075135 0.109098 0.150810 0.096584 0.092398 0.096662 0.093350 0.123176 0.113375 1 8 0.246451 0.106886 0.039024 0.100540 0.082028 0.108344 0.078788 0.105980 0.130109 0.113708 0.134592 1 9 0.267139 0.072002 0.083053 0.112084 0.124356 0.128044 0.097491 0.132106 0.047198 0.110915 0.092751 1 10 0.351914 0.081445 0.069721 0.200748 0.099755 0.090368 0.117449 0.127677 0.050879 0.101965 0.059993 2 7 0.228459 0.085973 0.091366 0.051120 0.085890 0.070657 0.119253 0.082744 0.180051 0.097734 0.135213 2 8 0.256177 0.081094 0.077748 0.097335 0.060811 0.138077 0.130496 0.106911 0.101101 0.121346 0.085081 2 9 0.216631 0.046454 0.053018 0.117160 0.196036 0.144154 0.125199 0.090720 0.052621 0.098583 0.076055 2 10 0.284653 0.087567 0.125308 0.071778 0.071988 0.095966 0.099270 0.174715 0.109563 0.062611 0.101233 3 8 0.345583 0.117500 0.110134 0.117950 0.085312 0.098812 0.072826 0.079326 0.196758 0.059058 0.062325 3 9 0.203586 0.036574 0.050725 0.116287 0.174339 0.070881 0.116388 0.083683 0.060738 0.160257 0.130128 3 10 0.293849 0.059364 0.135117 0.099368 0.113124 0.135930 0.066876 0.075962 0.114771 0.127034 0.072454 4 9 0.234649 0.077170 0.048841 0.108638 0.107559 0.119732 0.116349 0.077063 0.111788 0.119497 0.113362 4 10 0.322930 0.090789 0.133412 0.098729 0.099123 0.084633 0.107534 0.137072 0.096560 0.042234 0.109913 5 10 0.279782 0.054314 0.114427 0.111042 0.069073 0.083048 0.105829 0.073806 0.119769 0.088666 0.180028""" distancefile = DistanceFile("test") distancefile.original_file_format = 'ALPHAFOLD2' distogram = Distogram("1") distancefile.add(distogram) list_res1 = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5] list_res2 = [6, 7, 8, 9, 10, 7, 8, 9, 10, 8, 9, 10, 9, 10, 10] bin_edges = (2.3125, 2.625, 2.9375, 3.25, 3.5625, 3.875, 4.1875, 4.5, 4.8125, 5.125, 5.4375, 5.75, 6.0625, 6.375, 6.6875, 6.9999995, 7.3125, 7.625, 7.9375, 8.25, 8.5625, 8.875, 9.1875, 9.5, 9.812499, 10.124999, 10.4375, 10.75, 11.0625, 11.375, 11.687499, 12., 12.3125, 12.625, 12.9375, 13.25, 13.5625, 13.874999, 14.187501, 14.499999, 14.812499, 15.124999, 15.437499, 15.75, 16.0625, 16.375, 16.687502, 16.999998, 17.312498, 17.624998, 17.937498, 18.25, 18.5625, 18.875, 19.1875, 19.5, 19.8125, 20.125, 20.437498, 20.75, 21.062498, 21.374998, 21.6875) distance_bins = [(0, bin_edges[0])] distance_bins += [(bin_edges[idx], bin_edges[idx + 1]) for idx in range(len(bin_edges) - 1)] distance_bins.append((bin_edges[-1], np.inf)) distance_bins = tuple(distance_bins) np.random.seed(41) for res_1, res_2 in zip(list_res1, list_res2): distance_scores = np.random.dirichlet(np.ones(64)).tolist() distance = Distance(res_1, res_2, distance_scores, distance_bins) distogram.add(distance) f_name = self.tempfile() with open(f_name, "w") as f_out: CaspMode2Parser().write(f_out, distogram) with open(f_name, "r") as f_in: output = f_in.read().splitlines() self.assertListEqual(expected_output.split('\n'), output)
def test_get_probability_within_distance_1(self): distance = Distance(1, 25, (0.15, 0.45, 0.25, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))) self.assertEqual(distance.raw_score, 0.85) self.assertEqual(distance.get_probability_within_distance(5), 0.375) self.assertEqual(distance.get_probability_within_distance(8), 0.85) self.assertEqual(distance.get_probability_within_distance(10), 0.9) self.assertEqual(distance.get_probability_within_distance(25), 0.999999969409768) self.assertEqual(distance.get_probability_within_distance(np.inf), 1) self.assertEqual(distance.get_probability_within_distance(0), 0) with self.assertRaises(ValueError): distance.get_probability_within_distance(-5)
def test_calculate_rmsd_3(self): distogram_1 = Distogram("test_1") distogram_1.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.sequence = conkit.core.Sequence("test_seq", "AAAAA") distogram_2 = Distogram("test_2") distogram_2.add(Distance(1, 5, (0.45, 0.05, 0.25, 0.25), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(2, 3, (0.1, 0.15, 0.15, 0.6), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(1, 4, (0.75, 0.20, 0.05, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(3, 5, (0.05, 0.1, 0.35, 0.5), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(1, 6, (0.5, 0.1, 0.35, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.sequence = conkit.core.Sequence("test_seq", "AAAAA") with self.assertRaises(ValueError): Distogram.calculate_rmsd(distogram_1, distogram_2, seq_len=5, calculate_wrmsd=True)
def reshape_bins(self, new_bins): """Reshape the predicted distance bins for all :obj:`~conkit.core.distance.Distance` instances. This will update :attr:`~conkit.core.distance.Distance.distance_scores` and :attr:`~conkit.core.distance.Distance.distance_bins` to fit the new bins. Parameters ---------- new_bins : tuple A tuple of tuples, where each element corresponds with the upper and lower edges of the intervals for the new distance bins Raises ------ :exc:`ValueError` The new distance bins are not valid """ if self.original_file_format == 'pdb': raise ValueError( 'Cannot re-shape bins obtained from a PDB structure file') Distance._assert_valid_bins(new_bins) for distance in self: distance._reshape_bins(new_bins)
def test_calculate_rmsd_2(self): distogram_1 = Distogram("test_1") distogram_1.add(Distance(1, 5, (0.25, 0.45, 0.05, 0.05, 0.2), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1, 0.0), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(1, 4, (0.05, 0.2, 0.0, 0.6, 0.15), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.add(Distance(3, 5, (0.4, 0.1, 0.35, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf)))) distogram_1.sequence = conkit.core.Sequence("test_seq", "AAAAA") distogram_2 = Distogram("test_2") distogram_2.add(Distance(1, 5, (0.45, 0.05, 0.25, 0.25), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(2, 3, (0.1, 0.15, 0.15, 0.6), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(1, 4, (0.75, 0.20, 0.05, 0.0), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.add(Distance(3, 5, (0.05, 0.1, 0.35, 0.5), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram_2.sequence = conkit.core.Sequence("test_seq", "AAAAA") output = Distogram.calculate_rmsd(distogram_1, distogram_2, seq_len=5, calculate_wrmsd=True) expected = [4.09, 2.324, 3.937, 5.422, 3.85] self.assertListEqual(expected, [round(x, 3) for x in output])
def test_write_1(self): expected_output = """#REMARK MapPred 1.1 #REMARK idx_i, idx_j, distance distribution of 34 bins #REMARK 34 bins consist of 32 normal bins (4-20A with a step of 0.5A) and two boundary bins ( [0,4) and [20, inf) ), as follows: [0,4,4.5,5,5.5,6,6.5,7,7.5,8,8.5,9,9.5,10,10.5,11,11.5,12,12.5,13,13.5,14,14.5,15,15.5,16,16.5,17,17.5,18,18.5,19,19.5,20,inf] 5 10 0.013746 0.002245 0.053742 0.002115 0.005889 0.044058 0.010081 0.052535 0.118677 0.025818 0.019215 0.015831 0.009808 0.018148 0.031220 0.003428 0.058081 0.017978 0.065069 0.024163 0.044585 0.062025 0.026062 0.023824 0.012573 0.027729 0.022212 0.041685 0.005015 0.064340 0.004133 0.006420 0.018552 0.048998 1 35 0.187103 0.008180 0.021642 0.051089 0.038619 0.006100 0.010553 0.031697 0.010831 0.015310 0.006949 0.008237 0.043400 0.051436 0.003820 0.008148 0.018467 0.057307 0.022873 0.029184 0.008235 0.008025 0.004214 0.027027 0.070948 0.028355 0.049284 0.060124 0.041885 0.043900 0.000681 0.006836 0.007679 0.011862 43 85 0.024968 0.014838 0.021987 0.031265 0.019144 0.033038 0.018177 0.008716 0.017331 0.046459 0.051147 0.043912 0.004041 0.007990 0.027690 0.073997 0.001269 0.008161 0.067709 0.055700 0.028615 0.091884 0.021842 0.025949 0.025295 0.006136 0.031655 0.028990 0.082802 0.005069 0.002322 0.015611 0.039637 0.016654 85 43 0.015871 0.013765 0.006593 0.014670 0.029273 0.042705 0.058513 0.014858 0.050493 0.014216 0.010146 0.037020 0.018679 0.003142 0.031215 0.011736 0.008920 0.007325 0.144325 0.003512 0.018591 0.005043 0.001607 0.043659 0.068744 0.052532 0.050643 0.039295 0.003413 0.035119 0.102032 0.004150 0.005737 0.032456 50 50 0.000490 0.027392 0.001090 0.009625 0.011421 0.002011 0.015100 0.018622 0.008785 0.114531 0.044962 0.019562 0.022973 0.008111 0.042691 0.061367 0.001060 0.032753 0.073944 0.006790 0.002509 0.073759 0.025060 0.031361 0.039123 0.043318 0.032752 0.004280 0.044655 0.000556 0.000111 0.095043 0.028036 0.056157 18 50 0.002704 0.015000 0.024442 0.105520 0.014259 0.027628 0.002832 0.035063 0.038354 0.055931 0.039683 0.035546 0.004621 0.019932 0.012316 0.087781 0.006637 0.043857 0.008459 0.053482 0.016937 0.083507 0.031733 0.000793 0.004304 0.066937 0.009968 0.006859 0.038950 0.064003 0.003185 0.008042 0.007331 0.023401""" distancefile = DistanceFile("test") distancefile.original_file_format = 'MAPPRED' distogram = Distogram("1") distancefile.add(distogram) list_res1 = [5, 1, 43, 85, 50, 18] list_res2 = [10, 35, 85, 43, 50, 50] distance_bins = ((0, 4), (4, 4.5), (4.5, 5), (5, 5.5), (5.5, 6), (6, 6.5), (6.5, 7), (7, 7.5), (7.5, 8), (8, 8.5), (8.5, 9), (9, 9.5), (9.5, 10), (10, 10.5), (10.5, 11), (11, 11.5), (11.5, 12), (12, 12.5), (12.5, 13), (13, 13.5), (13.5, 14), (14, 14.5), (14.5, 15), (15, 15.5), (15.5, 16), (16, 16.5), (16.5, 17), (17, 17.5), (17.5, 18), (18, 18.5), (18.5, 19), (19, 19.5), (19.5, 20), (20, np.inf)) np.random.seed(41) for res_1, res_2 in zip(list_res1, list_res2): distance_scores = np.random.dirichlet(np.ones(34)).tolist() distance = Distance(res_1, res_2, distance_scores, distance_bins) distogram.add(distance) f_name = self.tempfile() with open(f_name, "w") as f_out: MapPredParser().write(f_out, distogram) with open(f_name, "r") as f_in: output = f_in.read().splitlines() self.assertListEqual(expected_output.split("\n"), output)
def read(self, f_handle, f_id="alphafold2"): """Read a distance prediction file Parameters ---------- f_handle Open file handle [read permissions] f_id : str, optional Unique contact file identifier Returns ------- :obj:`~conkit.core.distancefile.DistanceFile` """ hierarchy = DistanceFile(f_id) hierarchy.original_file_format = "alphafold2" _map = Distogram("distogram_1") hierarchy.add(_map) prediction = np.load(f_handle, allow_pickle=True) predicted_distogram = prediction['distogram'] probs = softmax(predicted_distogram['logits'], axis=-1) bin_edges = predicted_distogram['bin_edges'] distance_bins = [(0, bin_edges[0])] distance_bins += [(bin_edges[idx], bin_edges[idx + 1]) for idx in range(len(bin_edges) - 1)] distance_bins.append((bin_edges[-1], np.inf)) distance_bins = tuple(distance_bins) L = probs.shape[0] for i in range(L): for j in range(i, L): _distance = Distance(i + 1, j + 1, tuple(probs[i, j, :].tolist()), distance_bins) _map.add(_distance) return hierarchy
def test_get_absent_residues_2(self): distogram = Distogram("test") distogram.add(Distance(1, 5, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(2, 3, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.sequence = Sequence('test', 'AAAAAAA') self.assertListEqual([4, 6, 7], distogram.get_absent_residues())
def test_predicted_distance_2(self): distogram = Distogram('test') distogram.original_file_format = 'pdb' distance = Distance(36, 86, (1, ), ((6.589181, 6.589181), ), 0.934108) distogram.add(distance) self.assertEqual(6.589181, distance.predicted_distance)
def test_predicted_distance_3(self): distance = Distance(2, 3, (0.2, 0.3, 0.3, 0.2), ((0, 4), (4, 6), (6, 8), (8, np.inf))) self.assertEqual(distance.max_score, 0.3) self.assertTupleEqual(distance.predicted_distance_bin, (4, 6)) self.assertEqual(distance.predicted_distance, 5)
def test_ndistances_1(self): distogram = Distogram("test") distogram.add(Distance(1, 25, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.add(Distance(7, 19, (0.15, 0.15, 0.60, 0.1), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) self.assertEqual(2, distogram.ndistances)
def test_original_file_format_setter_2(self): distogram = Distogram("test") distogram.add(Distance(1, 25, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) with self.assertRaises(ValueError): distogram.original_file_format = 'mock_format'
def test_predicted_distance_1(self): distance = Distance(1, 25, (0.15, 0.45, 0.25, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))) self.assertEqual(distance.max_score, 0.45) self.assertTupleEqual(distance.predicted_distance_bin, (4, 6)) self.assertEqual(distance.predicted_distance, 5)
def test_original_file_format_setter_1(self): distogram = Distogram("test") distogram.add(Distance(1, 25, (0.25, 0.45, 0.25, 0.05), ((0, 4), (4, 6), (6, 8), (8, np.inf)))) distogram.original_file_format = 'mmcif'
def test__assert_valid_bins_5(self): distance = Distance(1, 25, (0.15, 0.45, 0.25, 0.05, 0.1), ((0, 4), (4, 6), (6, 8), (8, 10), (10, np.inf))) with self.assertRaises(ValueError): distance._assert_valid_bins( ((0, 1), (1, 10), (10, 20), (25, 30), (30, 31), (45, np.inf)))
def _read(self, structure, f_id, distance_cutoff, atom_type): """Read a contact file Parameters ---------- structure A :obj:`~Bio.PDB.Structure.Structure>` instance f_id : str Unique contact file identifier distance_cutoff : int Distance cutoff for which to determine contacts atom_type : str Atom type between which distances are calculated Returns ------- :obj:`~conkit.core.distancefile.DistanceFile~` """ hierarchies = [] distance_bound = (0.0, float(distance_cutoff)) for model in structure: hierarchy = DistanceFile(f_id + "_" + str(model.id)) hierarchy.original_file_format = "PDB" chains = list(chain for chain in model) for chain in chains: self._remove_hetatm(chain) self._remove_atom(chain, atom_type) for chain1, chain2 in itertools.product(chains, chains): if chain1.id == chain2.id: # intra distogram = Distogram(chain1.id) else: # inter distogram = Distogram(chain1.id + chain2.id) for (atom1, atom2, distance) in self._chain_contacts(chain1, chain2): if distance < distance_cutoff: score = round(1.0 - (distance / 100), 6) else: score = 0 dist = Distance(atom1.resseq, atom2.resseq, (1, ), ((distance, distance), ), score, distance_bound) dist.res1_altseq = atom1.resseq_alt dist.res2_altseq = atom2.resseq_alt dist.res1 = atom1.resname dist.res2 = atom2.resname dist.res1_chain = atom1.reschain dist.res2_chain = atom2.reschain if distance_cutoff == 0 or distance < distance_cutoff: dist.true_positive = True distogram.add(dist) if distogram.empty: del distogram else: if len(distogram.id) == 1: distogram.sequence = self._build_sequence(chain1) assert len(distogram.sequence.seq) == len(chain1) else: distogram.sequence = self._build_sequence( chain1) + self._build_sequence(chain2) assert len(distogram.sequence.seq ) == len(chain1) + len(chain2) hierarchy.add(distogram) hierarchy.method = "Distogram extracted from PDB " + str(model.id) hierarchy.remark = [ "The model id is the chain identifier, i.e XY equates to chain X and chain Y.", "Residue numbers in column 1 are chain X, and numbers in column 2 are chain Y.", ] hierarchies.append(hierarchy) if len(hierarchies) > 1: msg = "Super-level to contact file not yet implemented. " "Parser returns hierarchy for top model only!" warnings.warn(msg, FutureWarning) return hierarchies[0]
def Distance(*args, **kwargs): """:obj:`Contact <conkit.core.distance.Distance>` instance""" from conkit.core.distance import Distance return Distance(*args, **kwargs)