def test_tf_multitask_regression_overfit(self): """Test tf multitask overfits tiny data.""" n_tasks = 10 n_samples = 10 n_features = 3 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" regression_metric = Metric(metrics.mean_squared_error, verbosity=verbosity, task_averager=np.mean, mode="regression") tensorflow_model = TensorflowMultiTaskRegressor( n_tasks, n_features, self.model_dir, dropouts=[0.], learning_rate=0.0003, weight_init_stddevs=[.1], batch_size=n_samples, verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset, nb_epoch=50) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] < .1
def test_sklearn_multitask_regression_overfit(self): """Test SKLearn singletask-to-multitask overfits tiny regression data.""" n_tasks = 2 tasks = ["task%d" % task for task in range(n_tasks)] n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.rand(n_samples, n_tasks) w = np.ones((n_samples, n_tasks)) dataset = DiskDataset.from_numpy(self.train_dir, X, y, w, ids) verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity, task_averager=np.mean) def model_builder(model_dir): sklearn_model = RandomForestRegressor() return SklearnModel(sklearn_model, model_dir) model = SingletaskToMultitask(tasks, model_builder, self.model_dir) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .7
def test_tf_skewed_classification_overfit(self): """Test tensorflow models can overfit 0/1 datasets with few actives.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} #n_samples = 100 n_samples = 100 n_features = 3 n_tasks = len(tasks) n_classes = 2 # Generate dummy dataset np.random.seed(123) p = .05 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1500], "dropouts": [.0], "learning_rate": 0.003, "momentum": .9, "batch_size": n_samples, "num_classification_tasks": 1, "num_classes": n_classes, "num_features": n_features, "weight_init_stddevs": [1.], "bias_init_consts": [1.], "nb_epoch": 200, "penalty": 0.0, "optimizer": "adam", "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = TensorflowModel(tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .8
def test_tf_classification_overfit(self): """Test that tensorflow models can overfit simple classification datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) tensorflow_model = TensorflowMultiTaskClassifier( n_tasks, n_features, self.model_dir, dropouts=[0.], learning_rate=0.0003, weight_init_stddevs=[.1], batch_size=n_samples, verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_sklearn_classification_overfit(self): """Test that sklearn models can overfit simple classification datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) sklearn_model = RandomForestClassifier() model = SklearnModel(sklearn_model, self.model_dir) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_tf_regression_overfit(self): """Test that TensorFlow models can overfit simple regression datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" regression_metric = Metric(metrics.mean_squared_error, verbosity=verbosity) # TODO(rbharath): This breaks with optimizer="momentum". Why? tensorflow_model = TensorflowMultiTaskRegressor( n_tasks, n_features, self.model_dir, dropouts=[0.], learning_rate=0.003, weight_init_stddevs=[np.sqrt(6)/np.sqrt(1000)], batch_size=n_samples, verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] < .1
def evaluate(self, dataset, metrics, transformers=[], per_task_metrics=False): """ Evaluates the performance of this model on specified dataset. Parameters ---------- dataset: dc.data.Dataset Dataset object. metric: deepchem.metrics.Metric Evaluation metric transformers: list List of deepchem.transformers.Transformer per_task_metrics: bool If True, return per-task scores. Returns ------- dict Maps tasks to scores under metric. """ evaluator = Evaluator(self, dataset, transformers) if not per_task_metrics: scores = evaluator.compute_model_performance(metrics) return scores else: scores, per_task_scores = evaluator.compute_model_performance( metrics, per_task_metrics=per_task_metrics) return scores, per_task_scores
def test_keras_multitask_regression_overfit(self): """Test keras multitask overfits tiny data.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): n_tasks = 10 n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity, task_averager=np.mean, mode="regression") keras_model = MultiTaskDNN(n_tasks, n_features, "regression", dropout=0., learning_rate=.1, decay=1e-4) model = KerasModel(keras_model, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset, nb_epoch=100) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .75
def test_graph_conv_singletask_classification_overfit(self): """Test graph-conv multitask overfits tiny data.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): n_tasks = 1 n_samples = 10 n_features = 3 n_classes = 2 # Load mini log-solubility dataset. splittype = "scaffold" featurizer = ConvMolFeaturizer() tasks = ["outcome"] task_type = "classification" task_types = {task: task_type for task in tasks} input_file = os.path.join(self.current_dir, "example_classification.csv") loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) #n_atoms = 50 n_feat = 71 batch_size = 10 graph_model = SequentialGraphModel(n_feat) graph_model.add(GraphConv(64, activation='relu')) graph_model.add(BatchNormalization(epsilon=1e-5, mode=1)) graph_model.add(GraphPool()) # Gather Projection graph_model.add(Dense(128, activation='relu')) graph_model.add(BatchNormalization(epsilon=1e-5, mode=1)) graph_model.add(GraphGather(batch_size, activation="tanh")) with self.test_session() as sess: model = MultitaskGraphClassifier( sess, graph_model, n_tasks, self.model_dir, batch_size=batch_size, learning_rate=1e-3, learning_rate_decay_time=1000, optimizer_type="adam", beta1=.9, beta2=.999, verbosity="high") # Fit trained model model.fit(dataset, nb_epoch=20) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) ######################################################### DEBUG print("scores") print(scores) ######################################################### DEBUG assert scores[classification_metric.name] > .85
def test_tf_multitask_classification_overfit(self): """Test tf multitask overfits tiny data.""" n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) #y = np.random.randint(n_classes, size=(n_samples, n_tasks)) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1000], "dropouts": [.0], "learning_rate": 0.0003, "momentum": .9, "batch_size": n_samples, "num_classification_tasks": n_tasks, "num_classes": n_classes, "num_features": n_features, "weight_init_stddevs": [.1], "bias_init_consts": [1.], "nb_epoch": 100, "penalty": 0.0, "optimizer": "adam", "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) model = TensorflowModel(tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_sklearn_multitask_classification_overfit(self): """Test SKLearn singletask-to-multitask overfits tiny data.""" n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "batch_size": None, "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) def model_builder(tasks, task_types, model_params, model_dir, verbosity=None): return SklearnModel(tasks, task_types, model_params, model_dir, mode="classification", model_instance=RandomForestClassifier(), verbosity=verbosity) model = SingletaskToMultitask(tasks, task_types, model_params, self.model_dir, model_builder, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_tf_reload(self): """Test that tensorflow models can overfit simple classification datasets.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(n_classes, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1000], "dropouts": [0.0], "learning_rate": 0.003, "momentum": 0.9, "batch_size": n_samples, "num_classification_tasks": 1, "num_classes": n_classes, "num_features": n_features, "weight_init_stddevs": [1.0], "bias_init_consts": [1.0], "nb_epoch": 100, "penalty": 0.0, "optimizer": "adam", "data_shape": dataset.get_data_shape(), } verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) model = TensorflowModel( tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity ) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_model = TensorflowModel( tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity ) reloaded_model.reload() assert reloaded_model.eval_model._restored_model # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > 0.9
def test_singletask_to_multitask_classification(self): splittype = "scaffold" compound_featurizers = [CircularFingerprint(size=1024)] complex_featurizers = [] output_transformers = [] tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6", "task7", "task8", "task9", "task10", "task11", "task12", "task13", "task14", "task15", "task16"] task_types = {task: "classification" for task in tasks} input_file = "multitask_example.csv" n_features = 10 n_tasks = len(tasks) # Define train dataset n_train = 100 X_train = np.random.rand(n_train, n_features) y_train = np.random.randint(2, size=(n_train, n_tasks)) w_train = np.ones_like(y_train) ids_train = ["C"] * n_train train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train, w_train, ids_train, tasks) # Define test dataset n_test = 10 X_test = np.random.rand(n_test, n_features) y_test = np.random.randint(2, size=(n_test, n_tasks)) w_test = np.ones_like(y_test) ids_test = ["C"] * n_test test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test, w_test, ids_test, tasks) params_dict = { "batch_size": 32, "data_shape": train_dataset.get_data_shape() } classification_metrics = [Metric(metrics.roc_auc_score)] def model_builder(tasks, task_types, model_params, model_builder, verbosity=None): return SklearnModel(tasks, task_types, model_params, model_builder, model_instance=LogisticRegression()) multitask_model = SingletaskToMultitask(tasks, task_types, params_dict, self.model_dir, model_builder) # Fit trained model multitask_model.fit(train_dataset) multitask_model.save() # Eval multitask_model on train evaluator = Evaluator(multitask_model, train_dataset, output_transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics) # Eval multitask_model on test evaluator = Evaluator(multitask_model, test_dataset, output_transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics)
def _evaluate(self,model,metrics,output_transformers,*datasets): scores=[] for dataset in datasets: evaluator= Evaluator( model, dataset, output_transformers) score = evaluator.compute_model_performance(metrics) scores.append(score) # print(scores) return scores
def eval_trained_model(model_type, model_dir, data_dir, csv_out, stats_out): """Evaluates a trained model on specified data.""" model = Model.load(model_type, model_dir) data = Dataset(data_dir) evaluator = Evaluator(model, data, verbose=True) _, perf_df = evaluator.compute_model_performance(csv_out, stats_out) print("Model Performance.") print(perf_df)
def test_sklearn_multitask_classification(self): """Test that sklearn models can learn on simple multitask classification.""" np.random.seed(123) n_tasks = 4 dataset = sklearn.datasets.load_digits(n_class=2) X, y = dataset.data, dataset.target y = np.reshape(y, (len(y), 1)) y = np.hstack([y] * n_tasks) frac_train = .7 n_samples = len(X) X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples] X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:] train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train) test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test) tasks = train_dataset.get_task_names() task_types = {task: "classification" for task in tasks} model_params = { "batch_size": None, "data_shape": train_dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) def model_builder(tasks, task_types, model_params, model_dir, verbosity=None): return SklearnModel(tasks, task_types, model_params, model_dir, mode="classification", model_instance=LogisticRegression(), verbosity=verbosity) model = SingletaskToMultitask(tasks, task_types, model_params, self.model_dir, model_builder, verbosity=verbosity) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train transformers = [] train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity) train_scores = train_evaluator.compute_model_performance([classification_metric]) print("train_scores") print(train_scores) # Eval model on test transformers = [] evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) print("scores") print(scores) for score in scores[classification_metric.name]: assert score > .5
def test_tf_regression_overfit(self): """Test that TensorFlow models can overfit simple regression datasets.""" tasks = ["task0"] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1000], "dropouts": [.0], "learning_rate": 0.003, "momentum": .9, "batch_size": n_samples, "num_regression_tasks": 1, "num_features": n_features, "weight_init_stddevs": [np.sqrt(6) / np.sqrt(1000)], "bias_init_consts": [1.], "nb_epoch": 100, "penalty": 0.0, "optimizer": "momentum", "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.mean_squared_error, verbosity=verbosity) model = TensorflowModel(tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskRegressor, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] < .1
def test_keras_skewed_classification_overfit(self): """Test keras models can overfit 0/1 datasets with few actives.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 100 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) p = .05 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "nb_hidden": 1000, "activation": "relu", "dropout": .0, "learning_rate": .15, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": n_samples, "nb_epoch": 200, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False, "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_keras_reload(self): """Test that trained keras models can be reloaded correctly.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "nb_hidden": 1000, "activation": "relu", "dropout": 0.0, "learning_rate": 0.15, "momentum": 0.9, "nesterov": False, "decay": 1e-4, "batch_size": n_samples, "nb_epoch": 200, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False, "data_shape": dataset.get_data_shape(), } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) reloaded_model.reload() # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > 0.9
def test_tf_skewed_classification_overfit(self): """Test tensorflow models can overfit 0/1 datasets with few actives.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} #n_samples = 100 n_samples = 100 n_features = 3 n_tasks = len(tasks) n_classes = 2 # Generate dummy dataset np.random.seed(123) p = .05 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1500], "dropouts": [.0], "learning_rate": 0.003, "momentum": .9, "batch_size": n_samples, "num_classification_tasks": 1, "num_classes": n_classes, "num_features": n_features, "weight_init_stddevs": [1.], "bias_init_consts": [1.], "nb_epoch": 200, "penalty": 0.0, "optimizer": "adam", "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = TensorflowModel( tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .8
def test_multitask_keras_mlp_ECFP_classification_API(self): """Straightforward test of Keras multitask deepchem classification API.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): task_type = "classification" # TODO(rbharath): There should be some automatic check to ensure that all # required model_params are specified. # TODO(rbharath): Turning off dropout to make tests behave. model_params = {"nb_hidden": 10, "activation": "relu", "dropout": .0, "learning_rate": .01, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": 5, "nb_epoch": 2, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False} input_file = os.path.join(self.current_dir, "multitask_example.csv") tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6", "task7", "task8", "task9", "task10", "task11", "task12", "task13", "task14", "task15", "task16"] task_types = {task: task_type for task in tasks} featurizer = CircularFingerprint(size=1024) loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) transformers = [] model_params["data_shape"] = train_dataset.get_data_shape() classification_metrics = [Metric(metrics.roc_auc_score), Metric(metrics.matthews_corrcoef), Metric(metrics.recall_score), Metric(metrics.accuracy_score)] model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics)
def test_multiclass_classification_singletask(): """Test multiclass classification evaluation.""" X = np.random.rand(100, 5) y = np.random.randint(5, size=(100,)) dataset = dc.data.NumpyDataset(X, y) model = dc.models.MultitaskClassifier(1, 5, n_classes=5) evaluator = Evaluator(model, dataset, []) multitask_scores = evaluator.compute_model_performance( dc.metrics.roc_auc_score, n_classes=5) assert len(multitask_scores) == 1 assert multitask_scores["metric-1"] >= 0
def test_keras_multitask_regression_overfit(self): """Test keras multitask overfits tiny data.""" n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "nb_hidden": 1000, "activation": "relu", "dropout": .0, "learning_rate": .15, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": n_samples, "nb_epoch": 200, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False, "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .9
def test_keras_reload(self): """Test that trained keras models can be reloaded correctly.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) keras_model = MultiTaskDNN(n_tasks, n_features, "classification", dropout=0.) model = KerasModel(keras_model, self.model_dir) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_keras_model = MultiTaskDNN(n_tasks, n_features, "classification", dropout=0.) reloaded_model = KerasModel(reloaded_keras_model, self.model_dir) reloaded_model.reload( custom_objects={"MultiTaskDNN": MultiTaskDNN}) # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance( [classification_metric]) assert scores[classification_metric.name] > .6
def test_sklearn_reload(self): """Test that trained model can be reloaded correctly.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "batch_size": None, "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="classification", model_instance=RandomForestClassifier()) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="classification") reloaded_model.reload() # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def test_tf_multitask_regression_overfit(self): """Test tf multitask overfits tiny data.""" n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) #y = np.random.randint(n_classes, size=(n_samples, n_tasks)) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "layer_sizes": [1000], "dropouts": [.0], "learning_rate": 0.0003, "momentum": .9, "batch_size": n_samples, "num_regression_tasks": n_tasks, "num_classes": n_classes, "num_features": n_features, "weight_init_stddevs": [.1], "bias_init_consts": [1.], "nb_epoch": 100, "penalty": 0.0, "optimizer": "adam", "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) model = TensorflowModel( tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskRegressor, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .9
def test_evaluator_dc_metric(): """Test an evaluator on a dataset.""" X = np.random.rand(10, 5) y = np.random.rand(10, 1) dataset = dc.data.NumpyDataset(X, y) model = dc.models.MultitaskRegressor(1, 5) evaluator = Evaluator(model, dataset, []) metric = dc.metrics.Metric(dc.metrics.mae_score) multitask_scores = evaluator.compute_model_performance(metric) assert isinstance(multitask_scores, dict) assert len(multitask_scores) == 1 assert multitask_scores['mae_score'] > 0
def test_keras_multitask_regression_overfit(self): """Test keras multitask overfits tiny data.""" g = tf.Graph() sess = tf.Session(graph=g) K.set_session(sess) with g.as_default(): n_tasks = 10 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "nb_hidden": 1000, "activation": "relu", "dropout": .0, "learning_rate": .15, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": n_samples, "nb_epoch": 200, "init": "glorot_uniform", "nb_layers": 1, "batchnorm": False, "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .9
def test_sklearn_multiclass_classification_singletask(): """Test multiclass classification evaluation.""" X = np.random.rand(100, 5) y = np.random.randint(5, size=(100,)) dataset = dc.data.NumpyDataset(X, y) rf = sklearn.ensemble.RandomForestClassifier(50) model = dc.models.SklearnModel(rf) model.fit(dataset) evaluator = Evaluator(model, dataset, []) multitask_scores = evaluator.compute_model_performance( dc.metrics.roc_auc_score, n_classes=5) assert len(multitask_scores) == 1 assert multitask_scores["metric-1"] >= 0
def test_tf_reload(self): """Test that tensorflow models can overfit simple classification datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(n_classes, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity) tensorflow_model = TensorflowMultiTaskClassifier(n_tasks, n_features, self.model_dir, dropouts=[0.], verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_tensorflow_model = TensorflowMultiTaskClassifier( n_tasks, n_features, self.model_dir, dropouts=[0.], verbosity=verbosity) reloaded_model = TensorflowModel(reloaded_tensorflow_model, self.model_dir) reloaded_model.reload() # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .6
def test_sklearn_classification(self): """Test that sklearn models can learn on simple classification datasets.""" np.random.seed(123) dataset = sklearn.datasets.load_digits(n_class=2) X, y = dataset.data, dataset.target frac_train = .7 n_samples = len(X) X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples] X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:] print("X_train.shape, y_train.shape, X_test.shape, y_test.shape") print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train) test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test) tasks = train_dataset.get_task_names() task_types = {task: "classification" for task in tasks} model_params = { "batch_size": None, "data_shape": train_dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="classification", model_instance=LogisticRegression()) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train transformers = [] train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity) train_scores = train_evaluator.compute_model_performance([classification_metric]) print("train_scores") print(train_scores) # Eval model on test transformers = [] evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) print("scores") print(scores) assert scores[classification_metric.name] > .5
def test_singletask_sklearn_rf_RDKIT_descriptor_regression_API(self): """Test of singletask RF RDKIT-descriptor regression API.""" splittype = "scaffold" featurizer = RDKitDescriptors() tasks = ["log-solubility"] task_type = "regression" task_types = {task: task_type for task in tasks} model_params = {} input_file = os.path.join(self.current_dir, "example.csv") loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) input_transformers = [ NormalizationTransformer(transform_X=True, dataset=train_dataset), ClippingTransformer(transform_X=True, dataset=train_dataset)] output_transformers = [ NormalizationTransformer(transform_y=True, dataset=train_dataset)] transformers = input_transformers + output_transformers for dataset in [train_dataset, test_dataset]: for transformer in transformers: transformer.transform(dataset) model_params["data_shape"] = train_dataset.get_data_shape() regression_metrics = [Metric(metrics.r2_score), Metric(metrics.mean_squared_error), Metric(metrics.mean_absolute_error)] model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="regression", model_instance=RandomForestRegressor()) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(regression_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(regression_metrics)
def test_gc_multiclass_classification(): """Test multiclass classification evaluation.""" np.random.seed(1234) smiles = ["C", "CC"] featurizer = dc.feat.ConvMolFeaturizer() X = featurizer.featurize(smiles) y = np.random.randint(5, size=(len(smiles),)) dataset = dc.data.NumpyDataset(X, y) model = dc.models.GraphConvModel(1, mode="classification", n_classes=5) evaluator = Evaluator(model, dataset, []) multitask_scores = evaluator.compute_model_performance( dc.metrics.accuracy_score, n_classes=5) assert len(multitask_scores) == 1 assert multitask_scores["metric-1"] >= 0
def test_evaluator_sklearn_metric(): """Test an evaluator on a dataset.""" X = np.random.rand(10, 5) y = np.random.rand(10, 1) dataset = dc.data.NumpyDataset(X, y) model = dc.models.MultitaskRegressor(1, 5) evaluator = Evaluator(model, dataset, []) multitask_scores = evaluator.compute_model_performance( dc.metrics.mean_absolute_error) assert isinstance(multitask_scores, dict) assert len(multitask_scores) == 1 # Note that since no name as provided, metrics are index by order # given. assert multitask_scores['metric-1'] > 0
def test_multitask_model_evaluate_sklearn(): """Test evaluation of a multitask metric.""" X = np.random.rand(10, 5) y = np.random.rand(10, 2) dataset = dc.data.NumpyDataset(X, y) model = dc.models.MultitaskRegressor(2, 5) evaluator = Evaluator(model, dataset, []) multitask_scores, all_task_scores = evaluator.compute_model_performance( dc.metrics.mean_absolute_error, per_task_metrics=True) assert isinstance(multitask_scores, dict) assert len(multitask_scores) == 1 assert multitask_scores['metric-1'] > 0 assert isinstance(all_task_scores, dict) assert len(multitask_scores) == 1
def test_gc_binary_classification(): """Test multiclass classification evaluation.""" smiles = ["C", "CC"] featurizer = dc.feat.ConvMolFeaturizer() X = featurizer.featurize(smiles) y = np.random.randint(2, size=(len(smiles),)) dataset = dc.data.NumpyDataset(X, y) model = dc.models.GraphConvModel(1, mode="classification") # TODO: Fix this case with correct thresholding evaluator = Evaluator(model, dataset, []) multitask_scores = evaluator.compute_model_performance( dc.metrics.accuracy_score, n_classes=2) assert len(multitask_scores) == 1 assert multitask_scores["metric-1"] >= 0
def test_singletask_sklearn_rf_ECFP_regression_sharded_API(self): """Test of singletask RF ECFP regression API: sharded edition.""" splittype = "scaffold" featurizer = CircularFingerprint(size=1024) model_params = {} tasks = ["label"] task_type = "regression" task_types = {task: task_type for task in tasks} input_file = os.path.join( self.current_dir, "../../../datasets/pdbbind_core_df.pkl.gz") loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) input_transformers = [] output_transformers = [ NormalizationTransformer(transform_y=True, dataset=train_dataset)] transformers = input_transformers + output_transformers for dataset in [train_dataset, test_dataset]: for transformer in transformers: transformer.transform(dataset) # We set shard size above to force the creation of multiple shards of the data. # pdbbind_core has ~200 examples. model_params["data_shape"] = train_dataset.get_data_shape() regression_metrics = [Metric(metrics.r2_score), Metric(metrics.mean_squared_error), Metric(metrics.mean_absolute_error)] model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="regression", model_instance=RandomForestRegressor()) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(regression_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(regression_metrics)
def test_API(self): """Straightforward test of multitask deepchem classification API.""" splittype = "scaffold" feature_types = ["ECFP"] output_transforms = [] input_transforms = [] task_type = "classification" # TODO(rbharath): There should be some automatic check to ensure that all # required model_params are specified. model_params = {"nb_hidden": 10, "activation": "relu", "dropout": .5, "learning_rate": .01, "momentum": .9, "nesterov": False, "decay": 1e-4, "batch_size": 5, "nb_epoch": 2} model_name = "multitask_deep_classifier" # Featurize input featurizer = DataFeaturizer(tasks=self.tasks, smiles_field=self.smiles_field, verbose=True) feature_files = featurizer.featurize(self.input_file, feature_types, self.feature_dir) # Transform data into arrays for ML samples = FeaturizedSamples(self.samplesdir, feature_files, reload_data=False) # Split into train/test train_samples, test_samples = samples.train_test_split( splittype, self.train_dir, self.test_dir) train_dataset = Dataset(self.train_dir, train_samples, feature_types) test_dataset = Dataset(self.test_dir, test_samples, feature_types) # Transforming train/test data train_dataset.transform(input_transforms, output_transforms) test_dataset.transform(input_transforms, output_transforms) # Fit model task_types = {task: task_type for task in self.tasks} model_params["data_shape"] = train_dataset.get_data_shape() model = Model.model_builder(model_name, task_types, model_params) model.fit(train_dataset) model.save(self.model_dir) # Eval model on train evaluator = Evaluator(model, test_dataset, verbose=True) with tempfile.NamedTemporaryFile() as test_csv_out: with tempfile.NamedTemporaryFile() as test_stats_out: evaluator.compute_model_performance(test_csv_out, test_stats_out)
def test_multitask_evaluator(): """Test evaluation of a multitask metric.""" n_tasks = 2 X = np.random.rand(10, 5) y = np.random.rand(10, 2, 1) dataset = dc.data.NumpyDataset(X, y) model = dc.models.MultitaskRegressor(2, 5) evaluator = Evaluator(model, dataset, []) metric = dc.metrics.Metric(dc.metrics.mae_score) multitask_scores, all_task_scores = evaluator.compute_model_performance( metric, per_task_metrics=True) assert isinstance(multitask_scores, dict) assert len(multitask_scores) == 1 assert multitask_scores['mae_score'] > 0 assert isinstance(all_task_scores, dict) assert len(multitask_scores) == 1
def test_tf_skewed_missing_classification_overfit(self): """TF, skewed data, few actives Test tensorflow models overfit 0/1 datasets with missing data and few actives. This is intended to be as close to singletask MUV datasets as possible. """ n_samples = 5120 n_features = 6 n_tasks = 1 n_classes = 2 # Generate dummy dataset np.random.seed(123) p = .002 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) y_flat, w_flat = np.squeeze(y), np.squeeze(w) y_nonzero = y_flat[w_flat != 0] num_nonzero = np.count_nonzero(y_nonzero) weight_nonzero = len(y_nonzero)/num_nonzero w_flat[y_flat != 0] = weight_nonzero w = np.reshape(w_flat, (n_samples, n_tasks)) dataset = NumpyDataset(X, y, w, ids) verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) tensorflow_model = TensorflowMultiTaskClassifier( n_tasks, n_features, self.model_dir, dropouts=[0.], learning_rate=0.003, weight_init_stddevs=[1.], batch_size=n_samples, verbosity=verbosity) model = TensorflowModel(tensorflow_model, self.model_dir) # Fit trained model model.fit(dataset, nb_epoch=50) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .8
def test_sklearn_regression_overfit(self): """Test that sklearn models can overfit simple regression datasets.""" tasks = ["task0"] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.rand(n_samples, n_tasks) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "batch_size": None, "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="regression", model_instance=RandomForestRegressor()) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .7
def test_sklearn_reload(self): """Test that trained model can be reloaded correctly.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 10 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.randint(2, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = {"batch_size": None, "data_shape": dataset.get_data_shape()} verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = SklearnModel( tasks, task_types, model_params, self.model_dir, mode="classification", model_instance=RandomForestClassifier(), ) # Fit trained model model.fit(dataset) model.save() # Load trained model reloaded_model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="classification") reloaded_model.reload() # Eval model on train transformers = [] evaluator = Evaluator(reloaded_model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > 0.9
def test_sklearn_multitask_regression_overfit(self): """Test SKLearn singletask-to-multitask overfits tiny regression data.""" n_tasks = 2 tasks = ["task%d" % task for task in range(n_tasks)] task_types = {task: "regression" for task in tasks} n_samples = 10 n_features = 3 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.rand(n_samples, n_tasks) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "batch_size": None, "data_shape": dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) def model_builder(tasks, task_types, model_params, model_dir, verbosity=None): return SklearnModel(tasks, task_types, model_params, model_dir, mode="regression", model_instance=RandomForestRegressor(), verbosity=verbosity) model = SingletaskToMultitask(tasks, task_types, model_params, self.model_dir, model_builder, verbosity=verbosity) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) assert scores[regression_metric.name] > .7
def evaluate(self, dataset, metrics, transformers=[]): """ Evaluates the performance of this model on specified dataset. Parameters ---------- dataset: dc.data.Dataset Dataset object. metric: deepchem.metrics.Metric Evaluation metric transformers: list List of deepchem.transformers.Transformer Returns ------- dict Maps tasks to scores under metric. """ evaluator = Evaluator(self, dataset, transformers) scores = evaluator.compute_model_performance(metrics) return scores
def _create_model(self, train_dataset, test_dataset, model, transformers, metrics): """Helper method to create model for test.""" # Fit trained model model.fit(train_dataset) model.save(self.model_dir) # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbose=True) with tempfile.NamedTemporaryFile() as train_csv_out: with tempfile.NamedTemporaryFile() as train_stats_out: _, _, _ = evaluator.compute_model_performance( metrics, train_csv_out, train_stats_out) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbose=True) with tempfile.NamedTemporaryFile() as test_csv_out: with tempfile.NamedTemporaryFile() as test_stats_out: _, _, _ = evaluator.compute_model_performance( metrics, test_csv_out, test_stats_out)
def _create_model(self, splittype, feature_types, input_transforms, output_transforms, task_type, model_params, model_name, input_file, tasks, protein_pdb_field=None, ligand_pdb_field=None): """Helper method to create model for test.""" # Featurize input input_file = os.path.join(self.current_dir, input_file) featurizer = DataFeaturizer(tasks=tasks, smiles_field=self.smiles_field, protein_pdb_field=protein_pdb_field, ligand_pdb_field=ligand_pdb_field, verbose=True) feature_files = featurizer.featurize(input_file, feature_types, self.feature_dir) # Transform data into arrays for ML samples = FeaturizedSamples(self.samplesdir, feature_files, reload_data=False) # Split into train/test train_samples, test_samples = samples.train_test_split( splittype, self.train_dir, self.test_dir) train_dataset = Dataset(self.train_dir, train_samples, feature_types) test_dataset = Dataset(self.test_dir, test_samples, feature_types) # Transforming train/test data train_dataset.transform(input_transforms, output_transforms) test_dataset.transform(input_transforms, output_transforms) # Fit model task_types = {task: task_type for task in tasks} model_params["data_shape"] = train_dataset.get_data_shape() model = Model.model_builder(model_name, task_types, model_params) model.fit(train_dataset) model.save(self.model_dir) # Eval model on train evaluator = Evaluator(model, test_dataset, verbose=True) with tempfile.NamedTemporaryFile() as test_csv_out: with tempfile.NamedTemporaryFile() as test_stats_out: _, _ = evaluator.compute_model_performance( test_csv_out, test_stats_out)
def create_and_eval_model(train_dataset, test_dataset, task_type, model_params, model_name, model_dir, tasks): """Helper method to create model for test.""" # Fit model task_types = {task: task_type for task in tasks} model_params["data_shape"] = train_dataset.get_data_shape() print("Creating Model object.") import deepchem.models.deep model = Model.model_builder(model_name, task_types, model_params) print("About to fit model") model.fit(train_dataset) print("Done fitting, about to save...") model.save(model_dir) # Eval model on train evaluator = Evaluator(model, train_dataset, verbose=True) with tempfile.NamedTemporaryFile() as train_csv_out: with tempfile.NamedTemporaryFile() as train_stats_out: _, performance_df = evaluator.compute_model_performance( train_csv_out, train_stats_out) print("train_performance_df") print(performance_df) evaluator = Evaluator(model, test_dataset, verbose=True) with tempfile.NamedTemporaryFile() as test_csv_out: with tempfile.NamedTemporaryFile() as test_stats_out: _, performance_df = evaluator.compute_model_performance( test_csv_out, test_stats_out) print("test_performance_df") print(performance_df) return performance_df.iterrows().next()[1]["r2_score"]
def test_sklearn_skewed_classification_overfit(self): """Test sklearn models can overfit 0/1 datasets with few actives.""" tasks = ["task0"] task_types = {task: "classification" for task in tasks} n_samples = 100 n_features = 3 n_tasks = len(tasks) # Generate dummy dataset np.random.seed(123) p = .05 ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.random.binomial(1, p, size=(n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks) model_params = { "batch_size": None, "data_shape": dataset.get_data_shape() } verbosity = "high" classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="classification", model_instance=RandomForestClassifier()) # Fit trained model model.fit(dataset) model.save() # Eval model on train transformers = [] evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([classification_metric]) assert scores[classification_metric.name] > .9
def f(l00=0, l01=0, l02=0, l03=0, l04=0, l05=0, l06=0, l07=0, l08=0, l09=0, l10=0, l11=0, l12=0, l13=0, l14=0, l15=0, l16=0, l17=0, l18=0, l19=0): """ Optimizing function Take in hyper parameter values and return valid set performances Parameters ---------- l00~l19: int or float placeholders for hyperparameters being optimized, hyper_parameters dict is rebuilt based on input values of placeholders Returns: -------- valid_scores: float valid set performances """ args = locals() # Input hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(args[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(args[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] logger.info(hyper_parameters) # Run benchmark with open(log_file, 'a') as f: # Record hyperparameters f.write(str(hyper_parameters)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) score = valid_scores[self.model_class][metric[0].name] else: model_dir = tempfile.mkdtemp() model = self.model_class(hyper_parameters, model_dir) model.fit(train_dataset, **hyper_parameters) model.save() evaluator = Evaluator(model, valid_dataset, output_transformers) multitask_scores = evaluator.compute_model_performance(metric) score = multitask_scores[metric[0].name] with open(log_file, 'a') as f: # Record performances f.write(str(score)) f.write('\n') # GPGO maximize performance by default, set performance to its negative value for minimization if direction: return score else: return -score
def hyperparam_search(self, params_dict, train_dataset, valid_dataset, output_transformers, metric, use_max=True, logdir=None): """Perform hyperparams search according to params_dict. Each key to hyperparams_dict is a model_param. The values should be a list of potential values for that hyperparam. TODO(rbharath): This shouldn't be stored in a temporary directory. """ hyperparams = params_dict.keys() hyperparam_vals = params_dict.values() for hyperparam_list in params_dict.values(): assert isinstance(hyperparam_list, collections.Iterable) number_combinations = reduce(mul, [len(vals) for vals in hyperparam_vals]) valid_csv_out = tempfile.NamedTemporaryFile() valid_stats_out = tempfile.NamedTemporaryFile() if use_max: best_validation_score = -np.inf else: best_validation_score = np.inf best_hyperparams = None best_model, best_model_dir = None, None all_scores = {} for ind, hyperparameter_tuple in enumerate(itertools.product(*hyperparam_vals)): model_params = {} log("Fitting model %d/%d" % (ind+1, number_combinations), self.verbose) for hyperparam, hyperparam_val in zip(hyperparams, hyperparameter_tuple): model_params[hyperparam] = hyperparam_val log("hyperparameters: %s" % str(model_params), self.verbose) if logdir is not None: model_dir = os.path.join(logdir, str(ind)) log("model_dir is %s" % model_dir, self.verbose) try: os.makedirs(model_dir) except OSError: if not os.path.isdir(model_dir): log("Error creating model_dir, using tempfile directory", self.verbose) model_dir = tempfile.mkdtemp() else: model_dir = tempfile.mkdtemp() model = self.model_class(model_params, model_dir) model.fit(train_dataset, **model_params) model.save() evaluator = Evaluator(model, valid_dataset, output_transformers) multitask_scores = evaluator.compute_model_performance( [metric], valid_csv_out.name, valid_stats_out.name) valid_score = multitask_scores[metric.name] all_scores[str(hyperparameter_tuple)] = valid_score if (use_max and valid_score >= best_validation_score) or ( not use_max and valid_score <= best_validation_score): best_validation_score = valid_score best_hyperparams = hyperparameter_tuple if best_model_dir is not None: shutil.rmtree(best_model_dir) best_model_dir = model_dir best_model = model else: shutil.rmtree(model_dir) log("Model %d/%d, Metric %s, Validation set %s: %f" % (ind+1, number_combinations, metric.name, ind, valid_score), self.verbose) log("\tbest_validation_score so far: %f" % best_validation_score, self.verbose) if best_model is None: log("No models trained correctly.", self.verbose) # arbitrarily return last model best_model, best_hyperparams = model, hyperparameter_tuple return best_model, best_hyperparams, all_scores train_csv_out = tempfile.NamedTemporaryFile() train_stats_out = tempfile.NamedTemporaryFile() train_evaluator = Evaluator(best_model, train_dataset, output_transformers) multitask_scores = train_evaluator.compute_model_performance( [metric], train_csv_out.name, train_stats_out.name) train_score = multitask_scores[metric.name] log("Best hyperparameters: %s" % str(best_hyperparams), self.verbose) log("train_score: %f" % train_score, self.verbose) log("validation_score: %f" % best_validation_score, self.verbose) return best_model, best_hyperparams, all_scores
def hyperparam_search(self, params_dict, train_dataset, valid_dataset, output_transformers, metric, use_max=True, logdir=None): """Perform hyperparams search according to params_dict. Each key to hyperparams_dict is a model_param. The values should be a list of potential values for that hyperparam. """ hyperparams = params_dict.keys() hyperparam_vals = params_dict.values() for hyperparam_list in params_dict.itervalues(): assert isinstance(hyperparam_list, collections.Iterable) number_combinations = reduce(mul, [len(vals) for vals in hyperparam_vals]) valid_csv_out = tempfile.NamedTemporaryFile() valid_stats_out = tempfile.NamedTemporaryFile() if use_max: best_validation_score = -np.inf else: best_validation_score = np.inf best_hyperparams = None best_model, best_model_dir = None, None all_scores = {} for ind, hyperparameter_tuple in enumerate(itertools.product(*hyperparam_vals)): model_params = {} for hyperparam, hyperparam_val in zip(hyperparams, hyperparameter_tuple): model_params[hyperparam] = hyperparam_val if logdir is not None: model_dir = logdir else: model_dir = tempfile.mkdtemp() if logdir is not None: #TODO(JG) Fit transformers for TF models model = self.model_class(self.task_types, model_params, model_dir, verbosity=self.verbosity) else: if self.fit_transformers: model = self.model_class(self.task_types, model_params, fit_transformers=self.fit_transformers, verbosity=self.verbosity) else: model = self.model_class(self.task_types, model_params, verbosity=self.verbosity) model.fit(train_dataset) model.save(model_dir) evaluator = Evaluator(model, valid_dataset, output_transformers) df, scores_df, multitask_scores = evaluator.compute_model_performance( [metric], valid_csv_out, valid_stats_out) if not metric.is_multitask: valid_score = scores_df.iloc[0][metric.name] else: valid_score = multitask_scores[metric.name] all_scores[hyperparameter_tuple] = valid_score if (use_max and valid_score >= best_validation_score) or ( not use_max and valid_score <= best_validation_score): best_validation_score = valid_score best_hyperparams = hyperparameter_tuple if best_model_dir is not None: shutil.rmtree(best_model_dir) best_model_dir = model_dir best_model = model else: shutil.rmtree(model_dir) log("Model %d/%d, Metric %s, Validation set %s: %f" % (ind, number_combinations, metric.name, ind, valid_score), self.verbosity) log("\tbest_validation_score so far: %f" % best_validation_score, self.verbosity) if best_model is None: log("No models trained correctly.", self.verbosity) return best_model, best_hyperparams, all_scores train_csv_out = tempfile.NamedTemporaryFile() train_stats_out = tempfile.NamedTemporaryFile() train_evaluator = Evaluator(best_model, train_dataset, output_transformers) train_df, train_score, multitask_scores = train_evaluator.compute_model_performance( [metric], train_csv_out, train_stats_out) if not metric.is_multitask: train_score = train_score.iloc[0][metric.name] else: train_score = multitask_scores[metric.name] log("Best hyperparameters: %s" % str(zip(hyperparams, best_hyperparams)), self.verbosity) log("train_score: %f" % train_score, self.verbosity) log("validation_score: %f" % best_validation_score, self.verbosity) return best_model, best_hyperparams, all_scores
np.random.seed(123) pcba_tasks, pcba_datasets, transformers = load_pcba() (train_dataset, valid_dataset) = pcba_datasets metric = Metric(metrics.roc_auc_score, np.mean, mode="classification") model = TensorflowMultiTaskClassifier( len(pcba_tasks), n_features, model_dir, dropouts=[.25], learning_rate=0.001, weight_init_stddevs=[.1], batch_size=64, verbosity="high") # Fit trained model model.fit(train_dataset) model.save() train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity) train_scores = train_evaluator.compute_model_performance([metric]) print("Train scores") print(train_scores) valid_evaluator = Evaluator(model, valid_dataset, transformers, verbosity=verbosity) valid_scores = valid_evaluator.compute_model_performance([metric]) print("Validation scores") print(valid_scores)
def test_singletask_tf_mlp_ECFP_classification_API(self): """Straightforward test of Tensorflow singletask deepchem classification API.""" splittype = "scaffold" output_transformers = [] input_transformers = [] task_type = "classification" featurizer = CircularFingerprint(size=1024) tasks = ["outcome"] task_type = "classification" task_types = {task: task_type for task in tasks} input_file = os.path.join(self.current_dir, "example_classification.csv") loader = DataLoader(tasks=tasks, smiles_field=self.smiles_field, featurizer=featurizer, verbosity="low") dataset = loader.featurize(input_file, self.data_dir) splitter = ScaffoldSplitter() train_dataset, test_dataset = splitter.train_test_split( dataset, self.train_dir, self.test_dir) input_transformers = [] output_transformers = [ NormalizationTransformer(transform_y=True, dataset=train_dataset)] transformers = input_transformers + output_transformers for dataset in [train_dataset, test_dataset]: for transformer in transformers: transformer.transform(dataset) model_params = { "batch_size": 2, "num_classification_tasks": 1, "num_features": 1024, "layer_sizes": [1024], "weight_init_stddevs": [1.], "bias_init_consts": [0.], "dropouts": [.5], "num_classes": 2, "nb_epoch": 1, "penalty": 0.0, "optimizer": "adam", "learning_rate": .001, "data_shape": train_dataset.get_data_shape() } classification_metrics = [Metric(metrics.roc_auc_score), Metric(metrics.matthews_corrcoef), Metric(metrics.recall_score), Metric(metrics.accuracy_score)] model = TensorflowModel( tasks, task_types, model_params, self.model_dir, tf_class=TensorflowMultiTaskClassifier) # Fit trained model model.fit(train_dataset) model.save() # Eval model on train evaluator = Evaluator(model, train_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics) # Eval model on test evaluator = Evaluator(model, test_dataset, transformers, verbosity=True) _ = evaluator.compute_model_performance(classification_metrics)
def test_sklearn_transformed_regression(self): """Test that sklearn models can learn on simple transformed regression datasets.""" np.random.seed(123) dataset = sklearn.datasets.load_diabetes() X, y = dataset.data, dataset.target frac_train = .7 n_samples = len(X) X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples] X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:] train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train) test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test) # Eval model on train input_transformers = [ NormalizationTransformer(transform_X=True, dataset=train_dataset), ClippingTransformer(transform_X=True, dataset=train_dataset)] output_transformers = [ NormalizationTransformer(transform_y=True, dataset=train_dataset)] transformers = input_transformers + output_transformers for transformer in transformers: transformer.transform(train_dataset) for transformer in transformers: transformer.transform(test_dataset) tasks = train_dataset.get_task_names() task_types = {task: "regression" for task in tasks} model_params = { "batch_size": None, "data_shape": train_dataset.get_data_shape() } verbosity = "high" regression_metric = Metric(metrics.r2_score, verbosity=verbosity) model = SklearnModel(tasks, task_types, model_params, self.model_dir, mode="regression", model_instance=LinearRegression()) # Fit trained model model.fit(train_dataset) model.save() train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity) train_scores = train_evaluator.compute_model_performance([regression_metric]) print("train_scores") print(train_scores) assert train_scores[regression_metric.name] > .5 # Eval model on test transformers = [] evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity) scores = evaluator.compute_model_performance([regression_metric]) print("scores") print(scores) assert scores[regression_metric.name] > .5
print("About to perform train/valid/test split.") splitter = RandomSplitter(verbosity=verbosity) print("Performing new split.") train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split( nci_dataset, train_dir, valid_dir, test_dir) classification_metric = Metric(metrics.roc_auc_score, np.mean, verbosity=verbosity, mode="classification") def model_builder(model_dir): sklearn_model = RandomForestRegressor(n_estimators=500) return SklearnModel(sklearn_model, model_dir) model = SingletaskToMultitask(nci_tasks, model_builder, model_dir) # Fit trained model model.fit(train_dataset) model.save() train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity) train_scores = train_evaluator.compute_model_performance([classification_metric]) print("Train scores") print(train_scores) valid_evaluator = Evaluator(model, valid_dataset, transformers, verbosity=verbosity) valid_scores = valid_evaluator.compute_model_performance([classification_metric]) print("Validation scores") print(valid_scores)
def bace_rf_model(mode="classification", verbosity="high", split="20-80"): """Train random forests on BACE dataset.""" (bace_tasks, train_dataset, valid_dataset, test_dataset, crystal_dataset, transformers) = load_bace(mode=mode, transform=False, split=split) if mode == "regression": r2_metric = Metric(metrics.r2_score, verbosity=verbosity) rms_metric = Metric(metrics.rms_score, verbosity=verbosity) mae_metric = Metric(metrics.mae_score, verbosity=verbosity) all_metrics = [r2_metric, rms_metric, mae_metric] metric = r2_metric model_class = RandomForestRegressor def rf_model_builder(model_params, model_dir): sklearn_model = RandomForestRegressor(**model_params) return SklearnModel(sklearn_model, model_dir) elif mode == "classification": roc_auc_metric = Metric(metrics.roc_auc_score, verbosity=verbosity) accuracy_metric = Metric(metrics.accuracy_score, verbosity=verbosity) mcc_metric = Metric(metrics.matthews_corrcoef, verbosity=verbosity) # Note sensitivity = recall recall_metric = Metric(metrics.recall_score, verbosity=verbosity) model_class = RandomForestClassifier all_metrics = [accuracy_metric, mcc_metric, recall_metric, roc_auc_metric] metric = roc_auc_metric def rf_model_builder(model_params, model_dir): sklearn_model = RandomForestClassifier(**model_params) return SklearnModel(sklearn_model, model_dir) else: raise ValueError("Invalid mode %s" % mode) params_dict = { "n_estimators": [10, 100], "max_features": ["auto", "sqrt", "log2", None], } optimizer = HyperparamOpt(rf_model_builder, verbosity="low") best_rf, best_rf_hyperparams, all_rf_results = optimizer.hyperparam_search( params_dict, train_dataset, valid_dataset, transformers, metric=metric) if len(train_dataset) > 0: rf_train_evaluator = Evaluator(best_rf, train_dataset, transformers, verbosity=verbosity) csv_out = "rf_%s_%s_train.csv" % (mode, split) stats_out = "rf_%s_%s_train_stats.txt" % (mode, split) rf_train_score = rf_train_evaluator.compute_model_performance( all_metrics, csv_out=csv_out, stats_out=stats_out) print("RF Train set scores: %s" % (str(rf_train_score))) if len(valid_dataset) > 0: rf_valid_evaluator = Evaluator(best_rf, valid_dataset, transformers, verbosity=verbosity) csv_out = "rf_%s_%s_valid.csv" % (mode, split) stats_out = "rf_%s_%s_valid_stats.txt" % (mode, split) rf_valid_score = rf_valid_evaluator.compute_model_performance( all_metrics, csv_out=csv_out, stats_out=stats_out) print("RF Valid set scores: %s" % (str(rf_valid_score))) if len(test_dataset) > 0: rf_test_evaluator = Evaluator(best_rf, test_dataset, transformers, verbosity=verbosity) csv_out = "rf_%s_%s_test.csv" % (mode, split) stats_out = "rf_%s_%s_test_stats.txt" % (mode, split) rf_test_score = rf_test_evaluator.compute_model_performance( all_metrics, csv_out=csv_out, stats_out=stats_out) print("RF Test set: %s" % (str(rf_test_score))) if len(crystal_dataset) > 0: rf_crystal_evaluator = Evaluator(best_rf, crystal_dataset, transformers, verbosity) csv_out = "rf_%s_%s_crystal.csv" % (mode, split) stats_out = "rf_%s_%s_crystal_stats.txt" % (mode, split) rf_crystal_score = rf_crystal_evaluator.compute_model_performance( all_metrics, csv_out=csv_out, stats_out=stats_out) print("RF Crystal set: %s" % (str(rf_crystal_score)))