def test_fit_transform_transformer(self): trans = Shell(input_type="filename") a = AtomKernel(transformer=trans) res = a.fit_transform(ALL) try: numpy.testing.assert_array_almost_equal(RBF_KERNEL, res) except AssertionError as e: self.fail(e)
def test_fit_features(self): trans = Shell(input_type="filename") feats = trans.fit_transform(ALL) a = AtomKernel() values = list(zip(feats, ALL_NUMS)) a.fit(values) self.assertEqual(ALL_NUMS, list(a._numbers)) self.assertEqual(list(ALL_FEATURES), list(a._features))
def test_fit_transform_features(self): trans = Shell(input_type="filename") feats = trans.fit_transform(ALL) a = AtomKernel() values = list(zip(feats, ALL_NUMS)) res = a.fit_transform(values) try: numpy.testing.assert_array_almost_equal(RBF_KERNEL, res) except AssertionError as e: self.fail(e)
def test_laplace_kernel(self): # Set depth=2 so the comparison is not trivial trans = Shell(input_type="filename", depth=2) a = AtomKernel(transformer=trans, kernel="laplace", gamma=1.) a.fit(ALL) res = a.transform(ALL) try: numpy.testing.assert_array_almost_equal(LAPLACE_KERNEL, res) except AssertionError as e: self.fail(e)
def test_same_element(self): # Set depth=2 so the comparison is not trivial trans = Shell(input_type="filename", depth=2) # Set gamma=1 to make the differences more noticeable a = AtomKernel(transformer=trans, same_element=False, gamma=1.) res = a.fit_transform(ALL) expected = numpy.array([[17.00000033, 14.58016505], [14.58016505, 32.76067832]]) try: numpy.testing.assert_array_almost_equal(expected, res) except AssertionError as e: self.fail(e)
def test_custom_kernel(self): # Set depth=2 so the comparison is not trivial trans = Shell(input_type="filename", depth=2) # Simple linear kernel a = AtomKernel(transformer=trans, kernel=lambda x, y: numpy.dot(x, numpy.transpose(y))) a.fit(ALL) res = a.transform(ALL) try: numpy.testing.assert_array_almost_equal(LINEAR_KERNEL, res) except AssertionError as e: self.fail(e)
feat = CoulombMatrix() H2_conn = (H2_ELES, H2_COORDS, H2_CONNS) HCN_conn = (HCN_ELES, HCN_COORDS, HCN_CONNS) print(feat.fit_transform([H2_conn, HCN_conn])) print() # Example of generating the Coulomb matrix using a specified input_type print("User specified input_type") feat = CoulombMatrix(input_type=("coords", "numbers")) H2_spec = (H2_COORDS, H2_NUMS) HCN_spec = (HCN_COORDS, HCN_NUMS) print(feat.fit_transform([H2_spec, HCN_spec])) print() # Example of generating the Local Coulomb matrix (atom-wise # representation) print("Atom feature") feat = LocalCoulombMatrix() print(feat.fit_transform([H2, HCN])) # Example of generating AtomKernel print("Atom Kernel") feat = AtomKernel(transformer=LocalCoulombMatrix()) print(feat.fit_transform([H2, HCN])) # Example of using arbitrary function to load data # This example is useless, but it shows the possibility feat = CoulombMatrix(input_type=lambda x: LazyValues(elements=HCN_ELES, coords=HCN_COORDS)) feat.fit_transform(list(range(10)))
def test_transform_before_fit(self): a = AtomKernel() with self.assertRaises(ValueError): a.transform(ALL)
def test_fit_transformer(self): trans = Shell(input_type="filename") a = AtomKernel(transformer=trans) a.fit(ALL) self.assertEqual(ALL_NUMS, [x.tolist() for x in a._numbers]) self.assertEqual(list(ALL_FEATURES), list(a._features))
def test_invalid_kernel(self): with self.assertRaises(ValueError): trans = Shell(input_type="filename") a = AtomKernel(kernel=1, transformer=trans) a.fit_transform(ALL)
from molml.kernel import AtomKernel from utils import load_qm7 if __name__ == "__main__": # This is just boiler plate code to load the data Xin_train, Xin_test, y_train, y_test = load_qm7() # Look at just a few examples to be quick n_train = 200 n_test = 200 Xin_train = Xin_train[:n_train] y_train = y_train[:n_train] Xin_test = Xin_test[:n_test] y_test = y_test[:n_test] gamma = 1e-7 alpha = 1e-7 kern = AtomKernel(gamma=gamma, transformer=LocalEncodedBond(n_jobs=-1), n_jobs=-1) K_train = kern.fit_transform(Xin_train) K_test = kern.transform(Xin_test) clf = KernelRidge(alpha=alpha, kernel="precomputed") clf.fit(K_train, y_train) train_error = MAE(clf.predict(K_train), y_train) test_error = MAE(clf.predict(K_test), y_test) print("Train MAE: %.4f Test MAE: %.4f" % (train_error, test_error)) print()