Exemplo n.º 1
0
    def test_graph_conv_atom_features(self):
        tasks, dataset, transformers, metric = self.get_dataset('regression',
                                                                'Raw',
                                                                num_tasks=1)

        atom_feature_name = 'feature'
        y = []
        for mol in dataset.X:
            atom_features = []
            for atom in mol.GetAtoms():
                val = np.random.normal()
                mol.SetProp(
                    "atom %08d %s" % (atom.GetIdx(), atom_feature_name),
                    str(val))
                atom_features.append(np.random.normal())
            y.append(np.sum(atom_features))

        featurizer = ConvMolFeaturizer(atom_properties=[atom_feature_name])
        X = featurizer.featurize(dataset.X)
        dataset = deepchem.data.NumpyDataset(X, np.array(y))
        batch_size = 50
        model = GraphConvModel(
            len(tasks),
            number_atom_features=featurizer.feature_length(),
            batch_size=batch_size,
            mode='regression')

        model.fit(dataset, nb_epoch=1)
        y_pred1 = model.predict(dataset)
        model.save()

        model2 = TensorGraph.load_from_dir(model.model_dir)
        y_pred2 = model2.predict(dataset)
        self.assertTrue(np.all(y_pred1 == y_pred2))
Exemplo n.º 2
0
def test_graph_conv_atom_features():
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       'Raw',
                                                       num_tasks=1)

    atom_feature_name = 'feature'
    y = []
    for mol in dataset.X:
        atom_features = []
        for atom in mol.GetAtoms():
            val = np.random.normal()
            mol.SetProp("atom %08d %s" % (atom.GetIdx(), atom_feature_name),
                        str(val))
            atom_features.append(np.random.normal())
        y.append([np.sum(atom_features)])

    featurizer = ConvMolFeaturizer(atom_properties=[atom_feature_name])
    X = featurizer.featurize(dataset.X)
    dataset = dc.data.NumpyDataset(X, np.array(y))
    batch_size = 50
    model = GraphConvModel(len(tasks),
                           number_atom_features=featurizer.feature_length(),
                           batch_size=batch_size,
                           mode='regression')

    model.fit(dataset, nb_epoch=1)
    y_pred1 = model.predict(dataset)
Exemplo n.º 3
0
  def test_graph_conv_atom_features(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Raw', num_tasks=1)

    atom_feature_name = 'feature'
    y = []
    for mol in dataset.X:
      atom_features = []
      for atom in mol.GetAtoms():
        val = np.random.normal()
        mol.SetProp("atom %08d %s" % (atom.GetIdx(), atom_feature_name),
                    str(val))
        atom_features.append(np.random.normal())
      y.append([np.sum(atom_features)])

    featurizer = ConvMolFeaturizer(atom_properties=[atom_feature_name])
    X = featurizer.featurize(dataset.X)
    dataset = dc.data.NumpyDataset(X, np.array(y))
    batch_size = 50
    model = GraphConvModel(
        len(tasks),
        number_atom_features=featurizer.feature_length(),
        batch_size=batch_size,
        mode='regression')

    model.fit(dataset, nb_epoch=1)
    y_pred1 = model.predict(dataset)
    model.save()

    model2 = TensorGraph.load_from_dir(model.model_dir)
    y_pred2 = model2.predict(dataset)
    self.assertTrue(np.allclose(y_pred1, y_pred2))
Exemplo n.º 4
0
def generate_graph_conv_model():
    batch_size = 128
    model = GraphConvModel(1,
                           batch_size=batch_size,
                           mode="classification",
                           model_dir="/tmp/mk01/model_dir")

    dataset_file = "dude_ace.csv"
    tasks = ["is_active"]
    featurizer = dc.feat.ConvMolFeaturizer()
    loader = dc.data.CSVLoader(tasks=tasks,
                               smiles_field="SMILES",
                               featurizer=featurizer)
    dataset = loader.featurize(dataset_file, shard_size=8192)

    splitter = dc.splits.RandomSplitter()

    metrics = [
        dc.metrics.Metric(dc.metrics.matthews_corrcoef,
                          np.mean,
                          mode="classification")
    ]

    training_score_list = []
    validation_score_list = []
    transformers = []

    model.fit(dataset)
    print(model.evaluate(dataset, metrics))
    return model
Exemplo n.º 5
0
    def test_graph_conv_regression_model(self):
        tasks, dataset, transformers, metric = self.get_dataset(
            'regression', 'GraphConv')

        batch_size = 50
        model = GraphConvModel(len(tasks),
                               batch_size=batch_size,
                               mode='regression')

        model.fit(dataset, nb_epoch=100)
        scores = model.evaluate(dataset, [metric], transformers)
        assert all(s < 0.1 for s in scores['mean_absolute_error'])
Exemplo n.º 6
0
    def test_graph_conv_model(self):
        tasks, dataset, transformers, metric = self.get_dataset(
            'classification', 'GraphConv')

        batch_size = 50
        model = GraphConvModel(len(tasks),
                               batch_size=batch_size,
                               mode='classification')

        model.fit(dataset, nb_epoch=10)
        scores = model.evaluate(dataset, [metric], transformers)
        assert scores['mean-roc_auc_score'] >= 0.9
Exemplo n.º 7
0
def test_graph_conv_regression_model():
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       'GraphConv')

    batch_size = 10
    model = GraphConvModel(len(tasks),
                           batch_size=batch_size,
                           batch_normalize=False,
                           mode='regression')

    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.1
Exemplo n.º 8
0
  def test_graph_conv_error_bars(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv', num_tasks=1)

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=1)

    mu, sigma = model.bayesian_predict(
        dataset, transformers, untransform=True, n_passes=24)
    assert mu.shape == (len(dataset), len(tasks))
    assert sigma.shape == (len(dataset), len(tasks))
Exemplo n.º 9
0
  def test_graph_conv_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=1)
    scores = model.evaluate(dataset, [metric], transformers)

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores = model.evaluate(dataset, [metric], transformers)
Exemplo n.º 10
0
def test_neural_fingerprint_retrieval():
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       'GraphConv')

    fp_size = 3

    batch_size = 50
    model = GraphConvModel(len(tasks),
                           batch_size=batch_size,
                           dense_layer_size=3,
                           mode='classification')

    model.fit(dataset, nb_epoch=1)
    neural_fingerprints = model.predict_embedding(dataset)
    neural_fingerprints = np.array(neural_fingerprints)[:len(dataset)]
    assert (len(dataset), fp_size * 2) == neural_fingerprints.shape
Exemplo n.º 11
0
  def test_graph_conv_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])
Exemplo n.º 12
0
  def test_change_loss_function(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv', num_tasks=1)

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=1)
    model.save()

    model2 = TensorGraph.load_from_dir(model.model_dir, restore=False)
    dummy_label = model2.labels[-1]
    dummy_ouput = model2.outputs[-1]
    loss = ReduceSum(L2Loss(in_layers=[dummy_label, dummy_ouput]))
    module = model2.create_submodel(loss=loss)
    model2.restore()
    model2.fit(dataset, nb_epoch=1, submodel=module)
Exemplo n.º 13
0
  def test_change_loss_function(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv', num_tasks=1)

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=1)
    model.save()

    model2 = TensorGraph.load_from_dir(model.model_dir, restore=False)
    dummy_label = model2.labels[-1]
    dummy_ouput = model2.outputs[-1]
    loss = ReduceSum(L2Loss(in_layers=[dummy_label, dummy_ouput]))
    module = model2.create_submodel(loss=loss)
    model2.restore()
    model2.fit(dataset, nb_epoch=1, submodel=module)
Exemplo n.º 14
0
  def test_graph_conv_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(
        len(tasks), batch_size=batch_size, mode='classification')

    model.fit(dataset, nb_epoch=10)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])
Exemplo n.º 15
0
  def test_graph_conv_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(
        len(tasks), batch_size=batch_size, mode='classification')

    model.fit(dataset, nb_epoch=10)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])
Exemplo n.º 16
0
def test_graph_conv_model_no_task():
    tasks, dataset, _, __ = get_dataset('classification', 'GraphConv')
    batch_size = 10
    model = GraphConvModel(len(tasks),
                           batch_size=batch_size,
                           batch_normalize=False,
                           mode='classification')
    model.fit(dataset, nb_epoch=20)
    # predict datset with no y (ensured by tasks = [])
    bace_url = "https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/bace.csv"
    dc.utils.data_utils.download_url(url=bace_url, name="bace_tmp.csv")
    loader = dc.data.CSVLoader(tasks=[],
                               smiles_field='mol',
                               featurizer=dc.feat.ConvMolFeaturizer())
    td = loader.featurize(
        os.path.join(dc.utils.data_utils.get_data_dir(), "bace_tmp.csv"))
    model.predict(td)
Exemplo n.º 17
0
  def test_neural_fingerprint_retrieval(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'GraphConv')

    fp_size = 3

    batch_size = 50
    model = GraphConvModel(
        len(tasks),
        batch_size=batch_size,
        dense_layer_size=3,
        mode='classification')

    model.fit(dataset, nb_epoch=1)
    neural_fingerprints = model.predict(
        dataset, outputs=model.neural_fingerprint)
    neural_fingerprints = np.array(neural_fingerprints)[:len(dataset)]
    self.assertEqual((len(dataset), fp_size * 2), neural_fingerprints.shape)
Exemplo n.º 18
0
    def test_neural_fingerprint_retrieval(self):
        tasks, dataset, transformers, metric = self.get_dataset(
            'classification', 'GraphConv')

        fp_size = 3

        batch_size = 50
        model = GraphConvModel(len(tasks),
                               batch_size=batch_size,
                               dense_layer_size=3,
                               mode='classification')

        model.fit(dataset, nb_epoch=1)
        neural_fingerprints = model.predict(dataset,
                                            outputs=model.neural_fingerprint)
        neural_fingerprints = np.array(neural_fingerprints)[:len(dataset)]
        self.assertEqual((len(dataset), fp_size * 2),
                         neural_fingerprints.shape)
Exemplo n.º 19
0
  def test_graph_conv_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(
        scores['mean_absolute_error'],
        scores2['mean_absolute_error'],
        rtol=1e-4)
Exemplo n.º 20
0
def test_graph_conv_model():
    batch_size = 2000
    model = GraphConvModel(1,
                           batch_size=batch_size,
                           mode="classification",
                           model_dir="/tmp/covid/model_dir")
    dataset_file = "covid_mpro_combined_data_sources.csv"
    tasks = ["isHit"]
    featurizer = dc.feat.ConvMolFeaturizer()
    loader = dc.data.CSVLoader(tasks=tasks,
                               smiles_field="SMILES",
                               featurizer=featurizer)
    dataset = loader.featurize(dataset_file, shard_size=8192)

    metrics = [
        dc.metrics.Metric(dc.metrics.matthews_corrcoef,
                          np.mean,
                          mode="classification")
    ]

    splitter = dc.splits.RandomSplitter()

    train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
        dataset)

    model.fit(train_dataset)

    pred = [x.flatten() for x in model.predict(valid_dataset)]
    pred_df = pd.DataFrame(pred, columns=["neg", "pos"])
    pred_df["active"] = [int(x) for x in valid_dataset.y]
    pred_df["SMILES"] = valid_dataset.ids

    sns.boxplot(pred_df.active, pred_df.pos)

    print(model.evaluate(train_dataset, metrics))
    print(model.evaluate(test_dataset, metrics))

    metrics = [
        dc.metrics.Metric(dc.metrics.roc_auc_score,
                          np.mean,
                          mode="classification")
    ]
    print(model.evaluate(train_dataset, metrics))
    print(model.evaluate(test_dataset, metrics))
Exemplo n.º 21
0
    def test_graph_conv_regression_uncertainty(self):
        tasks, dataset, transformers, metric = self.get_dataset(
            'regression', 'GraphConv')

        batch_size = 50
        model = GraphConvModel(len(tasks),
                               batch_size=batch_size,
                               mode='regression',
                               dropout=0.1,
                               uncertainty=True)

        model.fit(dataset, nb_epoch=100)

        # Predict the output and uncertainty.
        pred, std = model.predict_uncertainty(dataset)
        mean_error = np.mean(np.abs(dataset.y - pred))
        mean_value = np.mean(np.abs(dataset.y))
        mean_std = np.mean(std)
        assert mean_error < 0.5 * mean_value
        assert mean_std > 0.5 * mean_error
        assert mean_std < mean_value
Exemplo n.º 22
0
  def test_graph_conv_regression_uncertainty(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(
        len(tasks),
        batch_size=batch_size,
        mode='regression',
        dropout=0.1,
        uncertainty=True)

    model.fit(dataset, nb_epoch=100)

    # Predict the output and uncertainty.
    pred, std = model.predict_uncertainty(dataset)
    mean_error = np.mean(np.abs(dataset.y - pred))
    mean_value = np.mean(np.abs(dataset.y))
    mean_std = np.mean(std)
    assert mean_error < 0.5 * mean_value
    assert mean_std > 0.5 * mean_error
    assert mean_std < mean_value
Exemplo n.º 23
0
from deepchem.molnet import load_delaney

# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = load_delaney(
    featurizer='GraphConv', split='index')
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
model = GraphConvModel(
    len(delaney_tasks), batch_size=batch_size, mode='regression')

# Fit trained model
model.fit(train_dataset, nb_epoch=20)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
Exemplo n.º 24
0
#train the model
batch_size = 2000
model = GraphConvModel(1,
                       batch_size=batch_size,
                       mode="classification",
                       model_dir="/tmp/covid/model_dir")
dataset_file = "covid_mpro_combined_data_sources.csv"
tasks = ["isHit"]
featurizer = dc.feat.ConvMolFeaturizer()
loader = dc.data.CSVLoader(tasks=tasks,
                           smiles_field="SMILES",
                           featurizer=featurizer)
dataset = loader.featurize(dataset_file, shard_size=8192)

model.fit(dataset)

#model = GraphConvModel(1, batch_size=128,mode="classification",model_dir="/tmp/mk01/model_dir")
#model.restore()
#make predictions
featurizer = dc.feat.ConvMolFeaturizer()
df = pd.read_csv("emol_10M.csv", sep=",")
#print('num rows in file',df.size)
#df.columns=["SMILES","Name"]

rows, cols = df.shape
df["Val"] = [
    0
] * rows  #just add add a dummy column to keep the featurizer happy
infile_name = "emol_10M_withVal.csv"
df.to_csv(infile_name, index=False)
# Number of features
n_feat = 75

# Batch size of models
batch_size = 128

model = GraphConvModel(len(delaney_tasks),
                       batch_size=batch_size,
                       mode='regression',
                       dropout=0.2)

# In[5]:

# Fit trained model
model.fit(train, nb_epoch=100)

print("Evaluating model")
train_scores = model.evaluate(train, [metric], transformers)
valid_scores = model.evaluate(valid, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)

# In[6]:

#Predictability test for trained model
Exemplo n.º 26
0
                       model_dir=model_dir,
                       random_seed=0)

metric = dc.metrics.Metric(dc.metrics.r2_score, mode='regression')

ckpt = tf.train.Checkpoint(step=tf.Variable(1))
manager = tf.train.CheckpointManager(ckpt, model_dir, max_to_keep=20)

start_time = time.time()

num_epochs = 100
losses_train = []
score_valid = []
score_train = []
for i in range(num_epochs):
    loss_train = model.fit(train_dataset, nb_epoch=1, deterministic=True)
    ckpt.step.assign_add(1)
    save_path = manager.save()
    print("Saved checkpoint for step {}: {} ".format(int(ckpt.step),
                                                     save_path))
    model.save_checkpoint(max_checkpoints_to_keep=20, model_dir=save_path)
    #model.restore()
    R2_train = model.evaluate(train_dataset, [metric])['r2_score']
    R2_valid = model.evaluate(valid_dataset, [metric])['r2_score']
    print("Epoch %d loss_train: %f R2_train %f R2_valid: %f  " %
          (i, loss_train, R2_train, R2_valid))

    losses_train.append(loss_train)
    score_valid.append(R2_valid)
    score_train.append(R2_train)