Пример #1
0
    def test_keras_reload(self):
        """Test that trained keras models can be reloaded correctly."""
        g = tf.Graph()
        sess = tf.Session(graph=g)
        K.set_session(sess)
        with g.as_default():
            tasks = ["task0"]
            task_types = {task: "classification" for task in tasks}
            n_samples = 10
            n_features = 3
            n_tasks = len(tasks)

            # Generate dummy dataset
            np.random.seed(123)
            ids = np.arange(n_samples)
            X = np.random.rand(n_samples, n_features)
            y = np.random.randint(2, size=(n_samples, n_tasks))
            w = np.ones((n_samples, n_tasks))

            dataset = NumpyDataset(X, y, w, ids)

            verbosity = "high"
            classification_metric = Metric(metrics.roc_auc_score,
                                           verbosity=verbosity)
            keras_model = MultiTaskDNN(n_tasks,
                                       n_features,
                                       "classification",
                                       dropout=0.)
            model = KerasModel(keras_model, self.model_dir)

            # Fit trained model
            model.fit(dataset)
            model.save()

            # Load trained model
            reloaded_keras_model = MultiTaskDNN(n_tasks,
                                                n_features,
                                                "classification",
                                                dropout=0.)
            reloaded_model = KerasModel(reloaded_keras_model, self.model_dir)
            reloaded_model.reload(
                custom_objects={"MultiTaskDNN": MultiTaskDNN})

            # Eval model on train
            transformers = []
            evaluator = Evaluator(reloaded_model,
                                  dataset,
                                  transformers,
                                  verbosity=verbosity)
            scores = evaluator.compute_model_performance(
                [classification_metric])

            assert scores[classification_metric.name] > .6
Пример #2
0
 def model_builder(model_params, model_dir):
     keras_model = MultiTaskDNN(len(tasks),
                                n_features,
                                task_type,
                                dropout=0.,
                                **model_params)
     return KerasModel(keras_model, model_dir)
Пример #3
0
 def model_builder(model_params, model_dir):
     keras_model = MultiTaskDNN(len(bace_tasks),
                                n_features,
                                "classification",
                                dropout=.5,
                                **model_params)
     return KerasModel(keras_model, model_dir)
Пример #4
0
  def test_keras_multitask_regression_overfit(self):
    """Test keras multitask overfits tiny data."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      n_tasks = 10
      n_samples = 10
      n_features = 3
      
      # Generate dummy dataset
      np.random.seed(123)
      ids = np.arange(n_samples)
      X = np.random.rand(n_samples, n_features)
      y = np.random.randint(2, size=(n_samples, n_tasks))
      w = np.ones((n_samples, n_tasks))
      dataset = NumpyDataset(X, y, w, ids)

      verbosity = "high"
      regression_metric = Metric(metrics.r2_score, verbosity=verbosity,
                                 task_averager=np.mean, mode="regression")
      keras_model = MultiTaskDNN(n_tasks, n_features, "regression",
                                 dropout=0., learning_rate=.1, decay=1e-4)
      model = KerasModel(keras_model, self.model_dir, verbosity=verbosity)

      # Fit trained model
      model.fit(dataset, nb_epoch=100)
      model.save()

      # Eval model on train
      transformers = []
      evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
      scores = evaluator.compute_model_performance([regression_metric])

      assert scores[regression_metric.name] > .75
Пример #5
0
  def test_multitask_keras_mlp_ECFP_classification_API(self):
    """Straightforward test of Keras multitask deepchem classification API."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      task_type = "classification"
      input_file = os.path.join(self.current_dir, "multitask_example.csv")
      tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
               "task7", "task8", "task9", "task10", "task11", "task12",
               "task13", "task14", "task15", "task16"]

      n_features = 1024
      featurizer = CircularFingerprint(size=n_features)
      loader = DataLoader(tasks=tasks,
                          smiles_field=self.smiles_field,
                          featurizer=featurizer,
                          verbosity="low")
      dataset = loader.featurize(input_file, self.data_dir)
      splitter = ScaffoldSplitter()
      train_dataset, test_dataset = splitter.train_test_split(
          dataset, self.train_dir, self.test_dir)

      transformers = []
      classification_metrics = [Metric(metrics.roc_auc_score),
                                Metric(metrics.matthews_corrcoef),
                                Metric(metrics.recall_score),
                                Metric(metrics.accuracy_score)]
      
      keras_model = MultiTaskDNN(len(tasks), n_features, "classification",
                                 dropout=0.)
      model = KerasModel(keras_model, self.model_dir)

      # Fit trained model
      model.fit(train_dataset)
      model.save()

      # Eval model on train
      evaluator = Evaluator(model, train_dataset, transformers, verbosity=True)
      _ = evaluator.compute_model_performance(classification_metrics)

      # Eval model on test
      evaluator = Evaluator(model, test_dataset, transformers, verbosity=True)
      _ = evaluator.compute_model_performance(classification_metrics)
Пример #6
0
  def test_keras_skewed_classification_overfit(self):
    """Test keras models can overfit 0/1 datasets with few actives."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      n_samples = 100
      n_features = 3
      n_tasks = 1
      
      # Generate dummy dataset
      np.random.seed(123)
      p = .05
      ids = np.arange(n_samples)
      X = np.random.rand(n_samples, n_features)
      y = np.random.binomial(1, p, size=(n_samples, n_tasks))
      w = np.ones((n_samples, n_tasks))
    
      dataset = NumpyDataset(X, y, w, ids)

      verbosity = "high"
      classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
      keras_model = MultiTaskDNN(n_tasks, n_features, "classification",
                                 dropout=0., learning_rate=.15, decay=1e-4)
      model = KerasModel(keras_model, self.model_dir)

      # Fit trained model
      model.fit(dataset, batch_size=n_samples, nb_epoch=200)
      model.save()

      # Eval model on train
      transformers = []
      evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
      scores = evaluator.compute_model_performance([classification_metric])

      assert scores[classification_metric.name] > .9
Пример #7
0
train_dataset, valid_dataset = muv_datasets
n_features = 1024

# Build model
classification_metric = Metric(metrics.roc_auc_score,
                               np.mean,
                               verbosity=verbosity,
                               mode="classification")

keras_model = MultiTaskDNN(len(muv_tasks),
                           n_features,
                           "classification",
                           dropout=.25,
                           learning_rate=.001,
                           decay=1e-4)
model = KerasModel(keras_model, self.model_dir, verbosity=verbosity)

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model,
                            train_dataset,
                            transformers,
                            verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance(
    [classification_metric])

print("Train scores")
print(train_scores)
Пример #8
0
classification_metric = Metric(metrics.roc_auc_score, np.mean,
                               verbosity=verbosity,
                               mode="classification")

learning_rates = [0.0003, 0.001, 0.003]
hidden_units = [1000, 500]
dropouts = [.5, .25]
num_hidden_layers = [1, 2]

# hyperparameter sweep here
for learning_rate in learning_rates:
  for hidden_unit in hidden_units:
    for dropout in dropouts:
      keras_model = MultiTaskDNN(len(sider_tasks), n_features, "classification",
                                 dropout=.25, learning_rate=.001, decay=1e-4)
      model = KerasModel(keras_model, self.model_dir, verbosity=verbosity)

      # Fit trained model
      model.fit(train_dataset)
      model.save()

      train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
      train_scores = train_evaluator.compute_model_performance([classification_metric])

      print("Train scores")
      print(train_scores)

      valid_evaluator = Evaluator(model, valid_dataset, transformers, verbosity=verbosity)
      valid_scores = valid_evaluator.compute_model_performance([classification_metric])

      print("Validation scores")