def test_r2_score(self): """Test that R^2 metric passes basic sanity tests""" verbosity = "high" np.random.seed(123) n_samples = 10 y_true = np.random.rand(n_samples, ) y_pred = np.random.rand(n_samples, ) regression_metric = Metric(metrics.r2_score, verbosity=verbosity) assert np.isclose(metrics.r2_score(y_true, y_pred), regression_metric.compute_metric(y_true, y_pred))
def test_r2_score(self): """Test that R^2 metric passes basic sanity tests""" verbosity = "high" np.random.seed(123) n_samples = 10 y_true = np.random.rand(n_samples,) y_pred = np.random.rand(n_samples,) regression_metric = Metric(metrics.r2_score, verbosity=verbosity) assert np.isclose(metrics.r2_score(y_true, y_pred), regression_metric.compute_metric(y_true, y_pred))
def test_singletask_sklearn_rf_ECFP_regression_API(self): """Test of singletask RF ECFP regression API.""" splittype = "scaffold" featurizer = CircularFingerprint(size=1024) model_params = {} tasks = ["log-solubility"] task_type = "regression" task_types = {task: task_type for task in tasks} input_file = os.path.join(self.current_dir, "example.csv") loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) input_transformers = [] output_transformers = [ NormalizationTransformer(transform_y=True, dataset=train_dataset) ] transformers = input_transformers + output_transformers model_params["data_shape"] = train_dataset.get_data_shape() regression_metrics = [ Metric(metrics.r2_score), Metric(metrics.mean_squared_error), Metric(metrics.mean_absolute_error) ] model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="regression", model_instance=RandomForestRegressor()) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(regression_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(regression_metrics)
def test_sklearn_classification_overfit(self): """Test that sklearn models can overfit simple classification datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) sklearn_model = RandomForestClassifier() model = SklearnModel(sklearn_model, self.model_dir) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_cgcnn_regression(): # load datasets current_dir = path.dirname(path.abspath(__file__)) config = { "reload": False, "featurizer": CGCNNFeaturizer(), # disable transformer "transformers": [], "data_dir": path.join(current_dir, "assets") } tasks, datasets, transformers = load_perovskite(**config) train, valid, test = datasets n_tasks = len(tasks) model = CGCNNModel( n_tasks=n_tasks, mode='regression', batch_size=4, learning_rate=0.001) # check train model.fit(train, nb_epoch=20) # check predict shape valid_preds = model.predict_on_batch(valid.X) assert valid_preds.shape == (2, n_tasks) test_preds = model.predict(test) assert test_preds.shape == (3, n_tasks) # check overfit regression_metric = Metric(mae_score, n_tasks=n_tasks) scores = model.evaluate(train, [regression_metric], transformers) assert scores[regression_metric.name] < 0.6 if path.exists(path.join(current_dir, 'perovskite.json')): remove(path.join(current_dir, 'perovskite.json'))
def test_tf_skewed_classification_overfit(self): """Test tensorflow models can overfit 0/1 datasets with few actives.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} #n_samples = 100 n_samples = 100 n_features = 3 n_tasks = len(tasks) n_classes = 2 # Generate dummy dataset np.random.seed(123) p = .05 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1500], "dropouts": [.0], "learning_rate": 0.003, "momentum": .9, "batch_size": n_samples, "num_classification_tasks": 1, "num_classes": n_classes, "num_features": n_features, "weight_init_stddevs": [1.], "bias_init_consts": [1.], "nb_epoch": 200, "penalty": 0.0, "optimizer": "adam", "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = TensorflowModel(tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .8
def test_tf_regression_overfit(self): """Test that TensorFlow models can overfit simple regression datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" regression_metric = Metric(metrics.mean_squared_error, verbosity=verbosity) # TODO(rbharath): This breaks with optimizer="momentum". Why? tensorflow_model = TensorflowMultiTaskRegressor( n_tasks, n_features, self.model_dir, dropouts=[0.], learning_rate=0.003, weight_init_stddevs=[np.sqrt(6)/np.sqrt(1000)], batch_size=n_samples, verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] < .1
def test_tf_classification_overfit(self): """Test that tensorflow models can overfit simple classification datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) tensorflow_model = TensorflowMultiTaskClassifier( n_tasks, n_features, self.model_dir, dropouts=[0.], learning_rate=0.0003, weight_init_stddevs=[.1], batch_size=n_samples, verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_sklearn_multitask_regression_overfit(self): """Test SKLearn singletask-to-multitask overfits tiny regression data.""" n_tasks = 2 tasks = ["task%d" % task for task in range(n_tasks)] n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.rand(n_samples, n_tasks) w = np.ones((n_samples, n_tasks)) dataset = DiskDataset.from_numpy(self.train_dir, X, y, w, ids) verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity, task_averager=np.mean) def model_builder(model_dir): sklearn_model = RandomForestRegressor() return SklearnModel(sklearn_model, model_dir) model = SingletaskToMultitask(tasks, model_builder, self.model_dir) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .7
def test_keras_multitask_regression_overfit(self): """Test keras multitask overfits tiny data.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): n_tasks = 10 n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity, task_averager=np.mean, mode="regression") keras_model = MultiTaskDNN(n_tasks, n_features, "regression", dropout=0., learning_rate=.1, decay=1e-4) model = KerasModel(keras_model, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .75
def test_tf_multitask_regression_overfit(self): """Test tf multitask overfits tiny data.""" n_tasks = 10 n_samples = 10 n_features = 3 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" regression_metric = Metric(metrics.mean_squared_error, verbosity=verbosity, task_averager=np.mean, mode="regression") tensorflow_model = TensorflowMultiTaskRegressor( n_tasks, n_features, self.model_dir, dropouts=[0.], learning_rate=0.0003, weight_init_stddevs=[.1], batch_size=n_samples, verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset, nb_epoch=50) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] < .1
def test_lcnn_reload(): # needs change current_dir = tempfile.mkdtemp() download_url(url=URL, dest_dir=current_dir) untargz_file(path.join(current_dir, 'lcnn_data_feature.tar.gz'), current_dir) tasks, datasets, transformers = load_dataset_from_disk( path.join(current_dir, 'lcnn_data')) train, valid, test = datasets model_dir = tempfile.mkdtemp() model = LCNNModel( mode='regression', batch_size=8, learning_rate=0.001, model_dir=model_dir) model.fit(train, nb_epoch=10) # check predict shape valid_preds = model.predict_on_batch(valid.X) assert valid_preds.shape == (65, 1) test_preds = model.predict(test) assert test_preds.shape == (65, 1) # check overfit regression_metric = Metric(mae_score) scores = model.evaluate(test, [regression_metric], transformers) assert scores[regression_metric.name] < 0.6 # reload reloaded_model = LCNNModel( mode='regression', batch_size=8, learning_rate=0.001, model_dir=model_dir) reloaded_model.restore() original_pred = model.predict(test) reload_pred = reloaded_model.predict(test) assert np.all(np.abs(original_pred - reload_pred) < 0.0000001)
def test_singletask_sklearn_rf_RDKIT_descriptor_regression_API(self): """Test of singletask RF RDKIT-descriptor regression API.""" splittype = "scaffold" featurizer = RDKitDescriptors() tasks = ["log-solubility"] task_type = "regression" task_types = {task: task_type for task in tasks} input_file = os.path.join(self.current_dir, "example.csv") loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) input_transformers = [ NormalizationTransformer(transform_X=True, dataset=train_dataset), ClippingTransformer(transform_X=True, dataset=train_dataset)] output_transformers = [ NormalizationTransformer(transform_y=True, dataset=train_dataset)] transformers = input_transformers + output_transformers for dataset in [train_dataset, test_dataset]: for transformer in transformers: transformer.transform(dataset) regression_metrics = [Metric(metrics.r2_score), Metric(metrics.mean_squared_error), Metric(metrics.mean_absolute_error)] sklearn_model = RandomForestRegressor() model = SklearnModel(sklearn_model, self.model_dir) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(regression_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(regression_metrics)
def test_tf_multitask_classification_overfit(self): """Test tf multitask overfits tiny data.""" n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) #y = np.random.randint(n_classes, size=(n_samples, n_tasks)) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1000], "dropouts": [.0], "learning_rate": 0.0003, "momentum": .9, "batch_size": n_samples, "num_classification_tasks": n_tasks, "num_classes": n_classes, "num_features": n_features, "weight_init_stddevs": [.1], "bias_init_consts": [1.], "nb_epoch": 100, "penalty": 0.0, "optimizer": "adam", "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) model = TensorflowModel(tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_sklearn_multitask_classification_overfit(self): """Test SKLearn singletask-to-multitask overfits tiny data.""" n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "batch_size": None, "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) def model_builder(tasks, task_types, model_params, model_dir, verbosity=None): return SklearnModel(tasks, task_types, model_params, model_dir, mode="classification", model_instance=RandomForestClassifier(), verbosity=verbosity) model = SingletaskToMultitask(tasks, task_types, model_params, self.model_dir, model_builder, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_graph_conv_singletask_classification_overfit(self): """Test graph-conv multitask overfits tiny data.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): n_tasks = 1 n_samples = 10 n_features = 3 n_classes = 2 # Load mini log-solubility dataset. splittype = "scaffold" featurizer = ConvMolFeaturizer() tasks = ["outcome"] task_type = "classification" task_types = {task: task_type for task in tasks} input_file = os.path.join(self.current_dir, "example_classification.csv") loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) #n_atoms = 50 n_feat = 71 batch_size = 10 graph_model = SequentialGraphModel(n_feat) graph_model.add(GraphConv(64, activation='relu')) graph_model.add(BatchNormalization(epsilon=1e-5, mode=1)) graph_model.add(GraphPool()) # Gather Projection graph_model.add(Dense(128, activation='relu')) graph_model.add(BatchNormalization(epsilon=1e-5, mode=1)) graph_model.add(GraphGather(batch_size, activation="tanh")) with self.test_session() as sess: model = MultitaskGraphClassifier( sess, graph_model, n_tasks, self.model_dir, batch_size=batch_size, learning_rate=1e-3, learning_rate_decay_time=1000, optimizer_type="adam", beta1=.9, beta2=.999, verbosity="high") # Fit trained model model.fit(dataset, nb_epoch=20) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) ######################################################### DEBUG print("scores") print(scores) ######################################################### DEBUG assert scores[classification_metric.name] > .85
def test_sklearn_regression(self): """Test that sklearn models can learn on simple regression datasets.""" np.random.seed(123) dataset = sklearn.datasets.load_diabetes() X, y = dataset.data, dataset.target frac_train = .7 n_samples = len(X) X_train, y_train = X[:frac_train * n_samples], y[:frac_train * n_samples] X_test, y_test = X[frac_train * n_samples:], y[frac_train * n_samples:] train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train) test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test) tasks = train_dataset.get_task_names() task_types = {task: "regression" for task in tasks} model_params = { "batch_size": None, "data_shape": train_dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="regression", model_instance=LinearRegression()) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train transformers = [] train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity) train_scores = train_evaluator.compute_model_performance( [regression_metric]) print("train_scores") print(train_scores) # Eval model on test transformers = [] evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) print("scores") print(scores) assert scores[regression_metric.name] > .5
def test_multitask_tf_mlp_ECFP_classification_hyperparam_opt(self): """Straightforward test of Tensorflow multitask deepchem classification API.""" splittype = "scaffold" task_type = "classification" input_file = os.path.join(self.current_dir, "multitask_example.csv") tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6", "task7", "task8", "task9", "task10", "task11", "task12", "task13", "task14", "task15", "task16"] task_types = {task: task_type for task in tasks} featurizer = CircularFingerprint(size=1024) loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split( dataset, self.train_dir, self.valid_dir, self.test_dir) transformers = [] metric = Metric(metrics.matthews_corrcoef, np.mean, mode="classification") params_dict = {"activation": ["relu"], "momentum": [.9], "batch_size": [50], "init": ["glorot_uniform"], "data_shape": [train_dataset.get_data_shape()], "learning_rate": [1e-3], "decay": [1e-6], "nb_hidden": [1000], "nb_epoch": [1], "nesterov": [False], "dropouts": [(.5,)], "nb_layers": [1], "batchnorm": [False], "layer_sizes": [(1000,)], "weight_init_stddevs": [(.1,)], "bias_init_consts": [(1.,)], "num_classes": [2], "penalty": [0.], "optimizer": ["sgd"], "num_classification_tasks": [len(task_types)] } def model_builder(tasks, task_types, params_dict, logdir, verbosity=None): return TensorflowModel( tasks, task_types, params_dict, logdir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity) optimizer = HyperparamOpt(model_builder, tasks, task_types, verbosity="low") best_model, best_hyperparams, all_results = optimizer.hyperparam_search( params_dict, train_dataset, valid_dataset, transformers, metric, logdir=None)
def test_singletask_tf_mlp_ECFP_classification_API(self): """Straightforward test of Tensorflow singletask deepchem classification API.""" n_features = 1024 featurizer = CircularFingerprint(size=n_features) tasks = ["outcome"] input_file = os.path.join(self.current_dir, "example_classification.csv") loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) transformers = [ NormalizationTransformer(transform_y=True, dataset=train_dataset)] for dataset in [train_dataset, test_dataset]: for transformer in transformers: transformer.transform(dataset) classification_metrics = [Metric(metrics.roc_auc_score), Metric(metrics.matthews_corrcoef), Metric(metrics.recall_score), Metric(metrics.accuracy_score)] tensorflow_model = TensorflowMultiTaskClassifier( len(tasks), n_features, self.model_dir) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics)
def test_multitask_keras_mlp_ECFP_classification_API(self): """Straightforward test of Keras multitask deepchem classification API.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): task_type = "classification" input_file = os.path.join(self.current_dir, "multitask_example.csv") tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6", "task7", "task8", "task9", "task10", "task11", "task12", "task13", "task14", "task15", "task16"] n_features = 1024 featurizer = CircularFingerprint(size=n_features) loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) transformers = [] classification_metrics = [Metric(metrics.roc_auc_score), Metric(metrics.matthews_corrcoef), Metric(metrics.recall_score), Metric(metrics.accuracy_score)] keras_model = MultiTaskDNN(len(tasks), n_features, "classification", dropout=0.) model = KerasModel(keras_model, self.model_dir) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics)
def test_tf_regression_overfit(self): """Test that TensorFlow models can overfit simple regression datasets.""" tasks = ["task0"] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1000], "dropouts": [.0], "learning_rate": 0.003, "momentum": .9, "batch_size": n_samples, "num_regression_tasks": 1, "num_features": n_features, "weight_init_stddevs": [np.sqrt(6) / np.sqrt(1000)], "bias_init_consts": [1.], "nb_epoch": 100, "penalty": 0.0, "optimizer": "momentum", "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.mean_squared_error, verbosity=verbosity) model = TensorflowModel(tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskRegressor, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] < .1
def test_keras_skewed_classification_overfit(self): """Test keras models can overfit 0/1 datasets with few actives.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 100 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) p = .05 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "nb_hidden": 1000, "activation": "relu", "dropout": .0, "learning_rate": .15, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": n_samples, "nb_epoch": 200, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False, "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_cgcnn_reload(): # load datasets current_dir = path.dirname(path.abspath(__file__)) config = { "reload": False, "featurizer": CGCNNFeaturizer(), # disable transformer "transformers": [], "data_dir": path.join(current_dir, "assets") } tasks, datasets, transformers = load_mp_metallicity(**config) train, valid, test = datasets n_tasks = len(tasks) n_classes = 2 model_dir = tempfile.mkdtemp() model = CGCNNModel( n_tasks=n_tasks, n_classes=n_classes, mode='classification', model_dir=model_dir, batch_size=4, learning_rate=0.001) # check train model.fit(train, nb_epoch=20) # check predict shape valid_preds = model.predict_on_batch(valid.X) assert valid_preds.shape == (2, n_classes) test_preds = model.predict(test) assert test_preds.shape == (3, n_classes) # check overfit classification_metric = Metric(roc_auc_score, n_tasks=n_tasks) scores = model.evaluate( train, [classification_metric], transformers, n_classes=n_classes) assert scores[classification_metric.name] > 0.8 # reload reloaded_model = CGCNNModel( n_tasks=n_tasks, n_classes=n_classes, mode='classification', model_dir=model_dir, batch_size=4, learning_rate=0.001) reloaded_model.restore() original_pred = model.predict(test) reload_pred = reloaded_model.predict(test) assert np.all(original_pred == reload_pred) if path.exists(path.join(current_dir, 'mp_is_metal.json')): remove(path.join(current_dir, 'mp_is_metal.json'))
def test_keras_multitask_regression_overfit(self): """Test keras multitask overfits tiny data.""" n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "nb_hidden": 1000, "activation": "relu", "dropout": .0, "learning_rate": .15, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": n_samples, "nb_epoch": 200, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False, "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .9
def test_sklearn_reload(self): """Test that trained model can be reloaded correctly.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "batch_size": None, "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="classification", model_instance=RandomForestClassifier()) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="classification") reloaded_model.reload() # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_singletask_to_multitask_sklearn_hyperparam_opt(self): """Test of hyperparam_opt with singletask_to_multitask.""" tasks = [ "task0", "task1", "task2", "task3", "task4", "task5", "task6", "task7", "task8", "task9", "task10", "task11", "task12", "task13", "task14", "task15", "task16" ] input_file = "multitask_example.csv" n_features = 10 n_tasks = len(tasks) # Define train dataset n_train = 100 X_train = np.random.rand(n_train, n_features) y_train = np.random.randint(2, size=(n_train, n_tasks)) w_train = np.ones_like(y_train) ids_train = ["C"] * n_train train_dataset = DiskDataset.from_numpy(self.train_dir, X_train, y_train, w_train, ids_train, tasks) # Define validation dataset n_valid = 10 X_valid = np.random.rand(n_valid, n_features) y_valid = np.random.randint(2, size=(n_valid, n_tasks)) w_valid = np.ones_like(y_valid) ids_valid = ["C"] * n_valid valid_dataset = DiskDataset.from_numpy(self.valid_dir, X_valid, y_valid, w_valid, ids_valid, tasks) transformers = [] classification_metric = Metric(metrics.matthews_corrcoef, np.mean, mode="classification") params_dict = {"n_estimators": [1, 10]} def multitask_model_builder(model_params, model_dir): def model_builder(model_dir): sklearn_model = RandomForestClassifier(**model_params) return SklearnModel(sklearn_model, model_dir) return SingletaskToMultitask(tasks, model_builder, model_dir) optimizer = HyperparamOpt(multitask_model_builder, verbosity="low") best_model, best_hyperparams, all_results = optimizer.hyperparam_search( params_dict, train_dataset, valid_dataset, transformers, classification_metric, logdir=None)
def test_keras_reload(self): """Test that trained keras models can be reloaded correctly.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) keras_model = MultiTaskDNN(n_tasks, n_features, "classification", dropout=0.) model = KerasModel(keras_model, self.model_dir) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_keras_model = MultiTaskDNN(n_tasks, n_features, "classification", dropout=0.) reloaded_model = KerasModel(reloaded_keras_model, self.model_dir) reloaded_model.reload( custom_objects={"MultiTaskDNN": MultiTaskDNN}) # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance( [classification_metric]) assert scores[classification_metric.name] > .6
def test_sklearn_multitask_classification(self): """Test that sklearn models can learn on simple multitask classification.""" np.random.seed(123) n_tasks = 4 tasks = range(n_tasks) dataset = sklearn.datasets.load_digits(n_class=2) X, y = dataset.data, dataset.target y = np.reshape(y, (len(y), 1)) y = np.hstack([y] * n_tasks) frac_train = .7 n_samples = len(X) n_train = int(frac_train * n_samples) X_train, y_train = X[:n_train], y[:n_train] X_test, y_test = X[n_train:], y[n_train:] train_dataset = DiskDataset.from_numpy(self.train_dir, X_train, y_train) test_dataset = DiskDataset.from_numpy(self.test_dir, X_test, y_test) verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) def model_builder(model_dir): sklearn_model = LogisticRegression() return SklearnModel(sklearn_model, model_dir) model = SingletaskToMultitask(tasks, model_builder, self.model_dir) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train transformers = [] train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity) train_scores = train_evaluator.compute_model_performance( [classification_metric]) # Eval model on test transformers = [] evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) for score in scores[classification_metric.name]: assert score > .5
def test_tf_reload(self): """Test that tensorflow models can overfit simple classification datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(n_classes, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) tensorflow_model = TensorflowMultiTaskClassifier(n_tasks, n_features, self.model_dir, dropouts=[0.], verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_tensorflow_model = TensorflowMultiTaskClassifier( n_tasks, n_features, self.model_dir, dropouts=[0.], verbosity=verbosity) reloaded_model = TensorflowModel(reloaded_tensorflow_model, self.model_dir) reloaded_model.reload() # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .6
def test_singletask_to_multitask_sklearn_hyperparam_opt(self): """Test of hyperparam_opt with singletask_to_multitask.""" splittype = "scaffold" output_transformers = [] tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6", "task7", "task8", "task9", "task10", "task11", "task12", "task13", "task14", "task15", "task16"] task_types = {task: "classification" for task in tasks} input_file = "multitask_example.csv" n_features = 10 n_tasks = len(tasks) # Define train dataset n_train = 100 X_train = np.random.rand(n_train, n_features) y_train = np.random.randint(2, size=(n_train, n_tasks)) w_train = np.ones_like(y_train) ids_train = ["C"] * n_train train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train, w_train, ids_train, tasks) # Define validation dataset n_valid = 10 X_valid = np.random.rand(n_valid, n_features) y_valid = np.random.randint(2, size=(n_valid, n_tasks)) w_valid = np.ones_like(y_valid) ids_valid = ["C"] * n_valid valid_dataset = Dataset.from_numpy(self.valid_dir, X_valid, y_valid, w_valid, ids_valid, tasks) params_dict = { "batch_size": [32], "data_shape": [train_dataset.get_data_shape()], } classification_metric = Metric(metrics.matthews_corrcoef, np.mean, mode="classification") def model_builder(tasks, task_types, model_params, task_model_dir, verbosity=None): return SklearnModel(tasks, task_types, model_params, task_model_dir, model_instance=LogisticRegression()) def multitask_model_builder(tasks, task_types, params_dict, logdir=None, verbosity=None): return SingletaskToMultitask(tasks, task_types, params_dict, self.model_dir, model_builder) optimizer = HyperparamOpt(multitask_model_builder, tasks, task_types, verbosity="low") best_model, best_hyperparams, all_results = optimizer.hyperparam_search( params_dict, train_dataset, valid_dataset, output_transformers, classification_metric, logdir=None)
def test_sklearn_transformed_regression(self): """Test that sklearn models can learn on simple transformed regression datasets.""" np.random.seed(123) dataset = sklearn.datasets.load_diabetes() X, y = dataset.data, dataset.target frac_train = .7 n_samples = len(X) n_train = int(frac_train * n_samples) X_train, y_train = X[:n_train], y[:n_train] X_test, y_test = X[n_train:], y[n_train:] train_dataset = DiskDataset.from_numpy(self.train_dir, X_train, y_train) test_dataset = DiskDataset.from_numpy(self.test_dir, X_test, y_test) # Eval model on train transformers = [ NormalizationTransformer(transform_X=True, dataset=train_dataset), ClippingTransformer(transform_X=True, dataset=train_dataset), NormalizationTransformer(transform_y=True, dataset=train_dataset) ] for data in [train_dataset, test_dataset]: for transformer in transformers: transformer.transform(data) verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) sklearn_model = LinearRegression() model = SklearnModel(sklearn_model, self.model_dir) # Fit trained model model.fit(train_dataset) model.save() train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity) train_scores = train_evaluator.compute_model_performance( [regression_metric]) assert train_scores[regression_metric.name] > .5 # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .5
model_dir = tempfile.mkdtemp() # 4-fold splits K = 4 # 10 positive/negative ligands n_pos = 10 n_neg = 10 # 10 trials on test-set n_trials = 10 # Sample supports without replacement (all pos/neg should be different) replace = False tox21_tasks, dataset, transformers = load_tox21_ecfp() # Define metric metric = Metric(dc.metrics.roc_auc_score, verbosity="high", mode="classification") task_splitter = TaskSplitter() fold_datasets = task_splitter.k_fold_split(dataset, K) all_scores = {} for fold in range(K): train_inds = list(set(range(K)) - set([fold])) train_folds = [fold_datasets[ind] for ind in train_inds] train_dataset = merge_fold_datasets(train_folds) test_dataset = fold_datasets[fold] fold_tasks = range(fold * len(test_dataset.get_task_names()), (fold+1) * len(test_dataset.get_task_names())) # Get supports on test-set