def test_compute_features_on_infinity_distance(): """Test that WeaveModel correctly transforms WeaveMol objects into tensors with infinite max_pair_distance.""" featurizer = dc.feat.WeaveFeaturizer(max_pair_distance=None) X = featurizer(["C", "CCC"]) batch_size = 20 model = WeaveModel(1, batch_size=batch_size, mode='classification', fully_connected_layer_sizes=[2000, 1000], batch_normalize=True, batch_normalize_kwargs={ "fused": False, "trainable": True, "renorm": True }, learning_rage=0.0005) atom_feat, pair_feat, pair_split, atom_split, atom_to_pair = model.compute_features_on_batch( X) # There are 4 atoms each of which have 75 atom features assert atom_feat.shape == (4, 75) # There are 10 pairs with infinity distance and 14 pair features assert pair_feat.shape == (10, 14) # 4 atoms in total assert atom_split.shape == (4, ) assert np.all(atom_split == np.array([0, 1, 1, 1])) # 10 pairs in total assert pair_split.shape == (10, ) assert np.all(pair_split == np.array([0, 1, 1, 1, 2, 2, 2, 3, 3, 3])) # 10 pairs in total each with start/finish assert atom_to_pair.shape == (10, 2) assert np.all( atom_to_pair == np.array([[0, 0], [1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3], [3, 1], [3, 2], [3, 3]]))
def test_weave_regression_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'regression', 'Weave') model = WeaveModel(len(tasks), mode='regression') model.fit(dataset, nb_epoch=80) scores = model.evaluate(dataset, [metric], transformers) assert all(s < 0.1 for s in scores['mean_absolute_error'])
def test_weave_fit_simple_distance_1(): featurizer = dc.feat.WeaveFeaturizer(max_pair_distance=1) X = featurizer(["C", "CCC"]) y = np.array([0, 1.]) dataset = dc.data.NumpyDataset(X, y) batch_size = 20 model = WeaveModel(1, batch_size=batch_size, mode='classification', fully_connected_layer_sizes=[2000, 1000], batch_normalize=True, batch_normalize_kwargs={ "fused": False, "trainable": True, "renorm": True }, learning_rage=0.0005) model.fit(dataset, nb_epoch=200) transformers = [] metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.9
def test_weave_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'classification', 'Weave') model = WeaveModel(len(tasks), mode='classification') model.fit(dataset, nb_epoch=50) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.9
def test_weave_model(): tasks, dataset, transformers, metric = get_dataset( 'classification', 'Weave', data_points=10) batch_size = 10 model = WeaveModel( len(tasks), batch_size=batch_size, mode='classification', dropouts=0, learning_rate=0.0001) model.fit(dataset, nb_epoch=250) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.9
def test_weave_regression_model(): import numpy as np import tensorflow as tf tf.random.set_seed(123) np.random.seed(123) tasks, dataset, transformers, metric = get_dataset('regression', 'Weave') batch_size = 10 model = WeaveModel(len(tasks), batch_size=batch_size, mode='regression', batch_normalize=False, fully_connected_layer_sizes=[], dropouts=0, learning_rate=0.0005) model.fit(dataset, nb_epoch=200) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.1
def test_change_loss_function_weave(self): tasks, dataset, transformers, metric = self.get_dataset( 'regression', 'Weave', num_tasks=1) batch_size = 50 model = WeaveModel( len(tasks), batch_size=batch_size, mode='regression', use_queue=False) model.fit(dataset, nb_epoch=1) model.save() model2 = TensorGraph.load_from_dir(model.model_dir, restore=False) dummy_label = model2.labels[-1] dummy_ouput = model2.outputs[-1] loss = ReduceSum(L2Loss(in_layers=[dummy_label, dummy_ouput])) module = model2.create_submodel(loss=loss) model2.restore() model2.fit(dataset, nb_epoch=1, submodel=module)
def test_weave_model(): tasks, dataset, transformers, metric = get_dataset('classification', 'Weave') batch_size = 20 model = WeaveModel(len(tasks), batch_size=batch_size, mode='classification', fully_connected_layer_sizes=[2000, 1000], batch_normalize=True, batch_normalize_kwargs={ "fused": False, "trainable": True, "renorm": True }, learning_rage=0.0005) model.fit(dataset, nb_epoch=200) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.9
def test_compute_features_on_distance_1(): """Test that WeaveModel correctly transforms WeaveMol objects into tensors with finite max_pair_distance.""" featurizer = dc.feat.WeaveFeaturizer(max_pair_distance=1) X = featurizer(["C", "CCC"]) batch_size = 20 model = WeaveModel( 1, batch_size=batch_size, mode='classification', fully_connected_layer_sizes=[2000, 1000], batch_normalize=True, batch_normalize_kwargs={ "fused": False, "trainable": True, "renorm": True }, learning_rate=0.0005) atom_feat, pair_feat, pair_split, atom_split, atom_to_pair = model.compute_features_on_batch( X) # There are 4 atoms each of which have 75 atom features assert atom_feat.shape == (4, 75) # There are 8 pairs with distance 1 and 14 pair features. (To see why 8, # there's the self pair for "C". For "CCC" there are 7 pairs including self # connections and accounting for symmetry.) assert pair_feat.shape == (8, 14) # 4 atoms in total assert atom_split.shape == (4,) assert np.all(atom_split == np.array([0, 1, 1, 1])) # 10 pairs in total assert pair_split.shape == (8,) # The center atom is self connected and to both neighbors so it appears # thrice. The canonical ranking used in MolecularFeaturizer means this # central atom is ranked last in ordering. assert np.all(pair_split == np.array([0, 1, 1, 2, 2, 3, 3, 3])) # 10 pairs in total each with start/finish assert atom_to_pair.shape == (8, 2) assert np.all(atom_to_pair == np.array([[0, 0], [1, 1], [1, 3], [2, 2], [2, 3], [3, 1], [3, 2], [3, 3]]))
def test_weave_regression_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'regression', 'Weave') model = WeaveModel(len(tasks), mode='regression') model.fit(dataset, nb_epoch=80) scores = model.evaluate(dataset, [metric], transformers) assert all(s < 0.1 for s in scores['mean_absolute_error']) model.save() model = TensorGraph.load_from_dir(model.model_dir) scores2 = model.evaluate(dataset, [metric], transformers) assert np.allclose(scores['mean_absolute_error'], scores2['mean_absolute_error'])
def test_weave_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'classification', 'Weave') model = WeaveModel(len(tasks), mode='classification') model.fit(dataset, nb_epoch=50) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.9 model.save() model = TensorGraph.load_from_dir(model.model_dir) scores2 = model.evaluate(dataset, [metric], transformers) assert np.allclose(scores['mean-roc_auc_score'], scores2['mean-roc_auc_score'])
def generate_weave_model(): batch_size = 64 model = WeaveModel(1, batch_size=batch_size, learning_rate=1e-3, use_queue=False, mode='regression') return model
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean) # Do setup required for tf/keras models n_tasks = 1 #Only solubility to predict n_atom_feat = 75 n_pair_feat = 14 n_hidden = 10 batch_size = 64 n_graph_feat = 10 nb_epoch = 10 model = WeaveModel( n_tasks=n_tasks, n_atom_feat=n_atom_feat, n_pair_feat=n_pair_feat, n_hidden=n_hidden, n_graph_feat=n_graph_feat, mode="regression", batch_size=batch_size, model_dir= "/home/rod/Dropbox/Quimica/Analysis/ANalisis/Borradores/WeaveModel/" ) #To prevent overfitting # Fit trained model print("Fitting model") model.fit(train_dataset, nb_epoch=nb_epoch) model.save() print("Evaluating model") train_scores = model.evaluate(train_dataset, [metric], transformers) valid_scores = model.evaluate(valid_dataset, [metric], transformers) print("Train scores")