Exemplo n.º 1
0
  def test_multitask_keras_mlp_ECFP_classification_API(self):
    """Straightforward test of Keras multitask deepchem classification API."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      task_type = "classification"
      # TODO(rbharath): There should be some automatic check to ensure that all
      # required model_params are specified.
      # TODO(rbharath): Turning off dropout to make tests behave.
      model_params = {"nb_hidden": 10, "activation": "relu",
                      "dropout": .0, "learning_rate": .01,
                      "momentum": .9, "nesterov": False,
                      "decay": 1e-4, "batch_size": 5,
                      "nb_epoch": 2, "init": "glorot_uniform",
                      "nb_layers": 1, "batchnorm": False}

      input_file = os.path.join(self.current_dir, "multitask_example.csv")
      tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
               "task7", "task8", "task9", "task10", "task11", "task12",
               "task13", "task14", "task15", "task16"]
      task_types = {task: task_type for task in tasks}

      featurizer = CircularFingerprint(size=1024)

      loader = DataLoader(tasks=tasks,
                          smiles_field=self.smiles_field,
                          featurizer=featurizer,
                          verbosity="low")
      dataset = loader.featurize(input_file, self.data_dir)
      splitter = ScaffoldSplitter()
      train_dataset, test_dataset = splitter.train_test_split(
          dataset, self.train_dir, self.test_dir)

      transformers = []
      model_params["data_shape"] = train_dataset.get_data_shape()
      classification_metrics = [Metric(metrics.roc_auc_score),
                                Metric(metrics.matthews_corrcoef),
                                Metric(metrics.recall_score),
                                Metric(metrics.accuracy_score)]
      
      model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir)

      # Fit trained model
      model.fit(train_dataset)
      model.save()

      # Eval model on train
      evaluator = Evaluator(model, train_dataset, transformers, verbosity=True)
      _ = evaluator.compute_model_performance(classification_metrics)

      # Eval model on test
      evaluator = Evaluator(model, test_dataset, transformers, verbosity=True)
      _ = evaluator.compute_model_performance(classification_metrics)
Exemplo n.º 2
0
    def test_keras_reload(self):
        """Test that trained keras models can be reloaded correctly."""
        g = tf.Graph()
        sess = tf.Session(graph=g)
        K.set_session(sess)
        with g.as_default():
            tasks = ["task0"]
            task_types = {task: "classification" for task in tasks}
            n_samples = 10
            n_features = 3
            n_tasks = len(tasks)

            # Generate dummy dataset
            np.random.seed(123)
            ids = np.arange(n_samples)
            X = np.random.rand(n_samples, n_features)
            y = np.random.randint(2, size=(n_samples, n_tasks))
            w = np.ones((n_samples, n_tasks))

            dataset = NumpyDataset(X, y, w, ids)

            verbosity = "high"
            classification_metric = Metric(metrics.roc_auc_score,
                                           verbosity=verbosity)
            keras_model = MultiTaskDNN(n_tasks,
                                       n_features,
                                       "classification",
                                       dropout=0.)
            model = KerasModel(keras_model, self.model_dir)

            # Fit trained model
            model.fit(dataset)
            model.save()

            # Load trained model
            reloaded_keras_model = MultiTaskDNN(n_tasks,
                                                n_features,
                                                "classification",
                                                dropout=0.)
            reloaded_model = KerasModel(reloaded_keras_model, self.model_dir)
            reloaded_model.reload(
                custom_objects={"MultiTaskDNN": MultiTaskDNN})

            # Eval model on train
            transformers = []
            evaluator = Evaluator(reloaded_model,
                                  dataset,
                                  transformers,
                                  verbosity=verbosity)
            scores = evaluator.compute_model_performance(
                [classification_metric])

            assert scores[classification_metric.name] > .6
Exemplo n.º 3
0
  def test_keras_skewed_classification_overfit(self):
    """Test keras models can overfit 0/1 datasets with few actives."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      tasks = ["task0"]
      task_types = {task: "classification" for task in tasks}
      n_samples = 100
      n_features = 3
      n_tasks = len(tasks)
      
      # Generate dummy dataset
      np.random.seed(123)
      p = .05
      ids = np.arange(n_samples)
      X = np.random.rand(n_samples, n_features)
      y = np.random.binomial(1, p, size=(n_samples, n_tasks))
      w = np.ones((n_samples, n_tasks))
    
      dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

      model_params = {
          "nb_hidden": 1000,
          "activation": "relu",
          "dropout": .0,
          "learning_rate": .15,
          "momentum": .9,
          "nesterov": False,
          "decay": 1e-4,
          "batch_size": n_samples,
          "nb_epoch": 200,
          "init": "glorot_uniform",
          "nb_layers": 1,
          "batchnorm": False,
          "data_shape": dataset.get_data_shape()
      }

      verbosity = "high"
      classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
      model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir,
                           verbosity=verbosity)

      # Fit trained model
      model.fit(dataset)
      model.save()

      # Eval model on train
      transformers = []
      evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
      scores = evaluator.compute_model_performance([classification_metric])

      assert scores[classification_metric.name] > .9
Exemplo n.º 4
0
  def test_keras_multitask_regression_overfit(self):
    """Test keras multitask overfits tiny data."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      n_tasks = 10
      n_samples = 10
      n_features = 3
      
      # Generate dummy dataset
      np.random.seed(123)
      ids = np.arange(n_samples)
      X = np.random.rand(n_samples, n_features)
      y = np.random.randint(2, size=(n_samples, n_tasks))
      w = np.ones((n_samples, n_tasks))
      dataset = NumpyDataset(X, y, w, ids)

      verbosity = "high"
      regression_metric = Metric(metrics.r2_score, verbosity=verbosity,
                                 task_averager=np.mean, mode="regression")
      keras_model = MultiTaskDNN(n_tasks, n_features, "regression",
                                 dropout=0., learning_rate=.1, decay=1e-4)
      model = KerasModel(keras_model, self.model_dir, verbosity=verbosity)

      # Fit trained model
      model.fit(dataset, nb_epoch=100)
      model.save()

      # Eval model on train
      transformers = []
      evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
      scores = evaluator.compute_model_performance([regression_metric])

      assert scores[regression_metric.name] > .75
Exemplo n.º 5
0
 def model_builder(model_params, model_dir):
     keras_model = MultiTaskDNN(len(tasks),
                                n_features,
                                task_type,
                                dropout=0.,
                                **model_params)
     return KerasModel(keras_model, model_dir)
Exemplo n.º 6
0
 def model_builder(model_params, model_dir):
     keras_model = MultiTaskDNN(len(bace_tasks),
                                n_features,
                                "classification",
                                dropout=.5,
                                **model_params)
     return KerasModel(keras_model, model_dir)
Exemplo n.º 7
0
    def test_keras_skewed_classification_overfit(self):
        """Test keras models can overfit 0/1 datasets with few actives."""
        tasks = ["task0"]
        task_types = {task: "classification" for task in tasks}
        n_samples = 100
        n_features = 3
        n_tasks = len(tasks)

        # Generate dummy dataset
        np.random.seed(123)
        p = .05
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.random.binomial(1, p, size=(n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))

        dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

        model_params = {
            "nb_hidden": 1000,
            "activation": "relu",
            "dropout": .0,
            "learning_rate": .15,
            "momentum": .9,
            "nesterov": False,
            "decay": 1e-4,
            "batch_size": n_samples,
            "nb_epoch": 200,
            "init": "glorot_uniform",
            "nb_layers": 1,
            "batchnorm": False,
            "data_shape": dataset.get_data_shape()
        }

        verbosity = "high"
        classification_metric = Metric(metrics.roc_auc_score,
                                       verbosity=verbosity)
        model = MultiTaskDNN(tasks,
                             task_types,
                             model_params,
                             self.model_dir,
                             verbosity=verbosity)

        # Fit trained model
        model.fit(dataset)
        model.save()

        # Eval model on train
        transformers = []
        evaluator = Evaluator(model,
                              dataset,
                              transformers,
                              verbosity=verbosity)
        scores = evaluator.compute_model_performance([classification_metric])

        assert scores[classification_metric.name] > .9
Exemplo n.º 8
0
    def test_keras_multitask_regression_overfit(self):
        """Test keras multitask overfits tiny data."""
        n_tasks = 10
        tasks = ["task%d" % task for task in range(n_tasks)]
        task_types = {task: "regression" for task in tasks}
        n_samples = 10
        n_features = 3

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.random.randint(2, size=(n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))

        dataset = Dataset.from_numpy(self.train_dir, X, y, w, ids, tasks)

        model_params = {
            "nb_hidden": 1000,
            "activation": "relu",
            "dropout": .0,
            "learning_rate": .15,
            "momentum": .9,
            "nesterov": False,
            "decay": 1e-4,
            "batch_size": n_samples,
            "nb_epoch": 200,
            "init": "glorot_uniform",
            "nb_layers": 1,
            "batchnorm": False,
            "data_shape": dataset.get_data_shape()
        }

        verbosity = "high"
        regression_metric = Metric(metrics.r2_score, verbosity=verbosity)
        model = MultiTaskDNN(tasks,
                             task_types,
                             model_params,
                             self.model_dir,
                             verbosity=verbosity)

        # Fit trained model
        model.fit(dataset)
        model.save()

        # Eval model on train
        transformers = []
        evaluator = Evaluator(model,
                              dataset,
                              transformers,
                              verbosity=verbosity)
        scores = evaluator.compute_model_performance([regression_metric])

        assert scores[regression_metric.name] > .9
Exemplo n.º 9
0
  def test_multitask_keras_mlp_ECFP_classification_API(self):
    """Straightforward test of Keras multitask deepchem classification API."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      task_type = "classification"
      input_file = os.path.join(self.current_dir, "multitask_example.csv")
      tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
               "task7", "task8", "task9", "task10", "task11", "task12",
               "task13", "task14", "task15", "task16"]

      n_features = 1024
      featurizer = CircularFingerprint(size=n_features)
      loader = DataLoader(tasks=tasks,
                          smiles_field=self.smiles_field,
                          featurizer=featurizer,
                          verbosity="low")
      dataset = loader.featurize(input_file, self.data_dir)
      splitter = ScaffoldSplitter()
      train_dataset, test_dataset = splitter.train_test_split(
          dataset, self.train_dir, self.test_dir)

      transformers = []
      classification_metrics = [Metric(metrics.roc_auc_score),
                                Metric(metrics.matthews_corrcoef),
                                Metric(metrics.recall_score),
                                Metric(metrics.accuracy_score)]
      
      keras_model = MultiTaskDNN(len(tasks), n_features, "classification",
                                 dropout=0.)
      model = KerasModel(keras_model, self.model_dir)

      # Fit trained model
      model.fit(train_dataset)
      model.save()

      # Eval model on train
      evaluator = Evaluator(model, train_dataset, transformers, verbosity=True)
      _ = evaluator.compute_model_performance(classification_metrics)

      # Eval model on test
      evaluator = Evaluator(model, test_dataset, transformers, verbosity=True)
      _ = evaluator.compute_model_performance(classification_metrics)
Exemplo n.º 10
0
  def test_keras_skewed_classification_overfit(self):
    """Test keras models can overfit 0/1 datasets with few actives."""
    g = tf.Graph()
    sess = tf.Session(graph=g)
    K.set_session(sess)
    with g.as_default():
      n_samples = 100
      n_features = 3
      n_tasks = 1
      
      # Generate dummy dataset
      np.random.seed(123)
      p = .05
      ids = np.arange(n_samples)
      X = np.random.rand(n_samples, n_features)
      y = np.random.binomial(1, p, size=(n_samples, n_tasks))
      w = np.ones((n_samples, n_tasks))
    
      dataset = NumpyDataset(X, y, w, ids)

      verbosity = "high"
      classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
      keras_model = MultiTaskDNN(n_tasks, n_features, "classification",
                                 dropout=0., learning_rate=.15, decay=1e-4)
      model = KerasModel(keras_model, self.model_dir)

      # Fit trained model
      model.fit(dataset, batch_size=n_samples, nb_epoch=200)
      model.save()

      # Eval model on train
      transformers = []
      evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
      scores = evaluator.compute_model_performance([classification_metric])

      assert scores[classification_metric.name] > .9
Exemplo n.º 11
0
os.makedirs(base_dir)

# Load MUV data
muv_tasks, muv_datasets, transformers = load_muv(base_dir, reload=reload)
train_dataset, valid_dataset = muv_datasets
n_features = 1024

# Build model
classification_metric = Metric(metrics.roc_auc_score,
                               np.mean,
                               verbosity=verbosity,
                               mode="classification")

keras_model = MultiTaskDNN(len(muv_tasks),
                           n_features,
                           "classification",
                           dropout=.25,
                           learning_rate=.001,
                           decay=1e-4)
model = KerasModel(keras_model, self.model_dir, verbosity=verbosity)

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model,
                            train_dataset,
                            transformers,
                            verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance(
    [classification_metric])
Exemplo n.º 12
0
    "momentum": .9,
    "nesterov": False,
    "decay": 1e-4,
    "batch_size": 64,
    "nb_epoch": 10,
    "init": "glorot_uniform",
    "nb_layers": 1,
    "batchnorm": False,
    "data_shape": train_dataset.get_data_shape()
}

if os.path.exists(model_dir):
  shutil.rmtree(model_dir)
os.makedirs(model_dir)

model = MultiTaskDNN(muv_tasks, muv_task_types, params_dict, model_dir,
                    verbosity=verbosity)

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance([classification_metric])

print("Train scores")
print(train_scores)

valid_evaluator = Evaluator(model, valid_dataset, transformers, verbosity=verbosity)
valid_scores = valid_evaluator.compute_model_performance([classification_metric])

print("Validation scores")
Exemplo n.º 13
0
    def test_multitask_keras_mlp_ECFP_classification_API(self):
        """Straightforward test of Keras multitask deepchem classification API."""
        from deepchem.models.keras_models.fcnet import MultiTaskDNN
        task_type = "classification"
        # TODO(rbharath): There should be some automatic check to ensure that all
        # required model_params are specified.
        model_params = {
            "nb_hidden": 10,
            "activation": "relu",
            "dropout": .5,
            "learning_rate": .01,
            "momentum": .9,
            "nesterov": False,
            "decay": 1e-4,
            "batch_size": 5,
            "nb_epoch": 2,
            "init": "glorot_uniform",
            "nb_layers": 1,
            "batchnorm": False
        }

        input_file = os.path.join(self.current_dir, "multitask_example.csv")
        tasks = [
            "task0", "task1", "task2", "task3", "task4", "task5", "task6",
            "task7", "task8", "task9", "task10", "task11", "task12", "task13",
            "task14", "task15", "task16"
        ]
        task_types = {task: task_type for task in tasks}

        featurizer = CircularFingerprint(size=1024)

        loader = DataLoader(tasks=tasks,
                            smiles_field=self.smiles_field,
                            featurizer=featurizer,
                            verbosity="low")
        dataset = loader.featurize(input_file, self.data_dir)
        splitter = ScaffoldSplitter()
        train_dataset, test_dataset = splitter.train_test_split(
            dataset, self.train_dir, self.test_dir)

        transformers = []
        model_params["data_shape"] = train_dataset.get_data_shape()
        classification_metrics = [
            Metric(metrics.roc_auc_score),
            Metric(metrics.matthews_corrcoef),
            Metric(metrics.recall_score),
            Metric(metrics.accuracy_score)
        ]

        model = MultiTaskDNN(tasks, task_types, model_params, self.model_dir)

        # Fit trained model
        model.fit(train_dataset)
        model.save()

        # Eval model on train
        evaluator = Evaluator(model,
                              train_dataset,
                              transformers,
                              verbosity=True)
        _ = evaluator.compute_model_performance(classification_metrics)

        # Eval model on test
        evaluator = Evaluator(model,
                              test_dataset,
                              transformers,
                              verbosity=True)
        _ = evaluator.compute_model_performance(classification_metrics)