def test_gat_classification(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GATModel(mode='classification', n_tasks=n_tasks, number_atom_features=30, batch_size=10, learning_rate=0.001) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 # test on a small MoleculeNet dataset from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer) train_set, _, _ = all_dataset model = dc.models.GATModel(mode='classification', n_tasks=len(tasks), graph_attention_layers=[2], n_attention_heads=1, residual=False, predictor_hidden_feats=2) model.fit(train_set, nb_epoch=1)
def test_mpnn_classification(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset('classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = MPNNModel(mode='classification', n_tasks=n_tasks, learning_rate=0.0005) # overfit test model.fit(dataset, nb_epoch=200) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 # test on a small MoleculeNet dataset from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer) train_set, _, _ = all_dataset model = MPNNModel(mode='classification', n_tasks=len(tasks), node_out_feats=2, edge_hidden_feats=2, num_step_message_passing=1, num_step_set2set=1, num_layer_set2set=1) model.fit(train_set, nb_epoch=1)
def get_dataset(mode='classification', featurizer='GraphConv', num_tasks=2): data_points = 20 if mode == 'classification': tasks, all_dataset, transformers = load_bace_classification( featurizer, reload=False) else: tasks, all_dataset, transformers = load_delaney(featurizer, reload=False) train, valid, test = all_dataset for i in range(1, num_tasks): tasks.append("random_task") w = np.ones(shape=(data_points, len(tasks))) if mode == 'classification': y = np.random.randint(0, 2, size=(data_points, len(tasks))) metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") else: y = np.random.normal(size=(data_points, len(tasks))) metric = dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression") ds = NumpyDataset(train.X[:data_points], y, w, train.ids[:data_points]) return tasks, ds, transformers, metric
def get_dataset(self, mode='classification', featurizer='GraphConv', num_tasks=2): data_points = 10 if mode == 'classification': tasks, all_dataset, transformers = load_bace_classification(featurizer) else: tasks, all_dataset, transformers = load_delaney(featurizer) train, valid, test = all_dataset for i in range(1, num_tasks): tasks.append("random_task") w = np.ones(shape=(data_points, len(tasks))) if mode == 'classification': y = np.random.randint(0, 2, size=(data_points, len(tasks))) metric = dc.metrics.Metric( dc.metrics.roc_auc_score, np.mean, mode="classification") else: y = np.random.normal(size=(data_points, len(tasks))) metric = dc.metrics.Metric( dc.metrics.mean_absolute_error, mode="regression") ds = NumpyDataset(train.X[:data_points], y, w, train.ids[:data_points]) return tasks, ds, transformers, metric
def test_attentivefp_classification(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset( 'classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = AttentiveFPModel( mode='classification', n_tasks=n_tasks, batch_size=10, learning_rate=0.001) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 # test on a small MoleculeNet dataset from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer) train_set, _, _ = all_dataset model = AttentiveFPModel( mode='classification', n_tasks=len(tasks), num_layers=1, num_timesteps=1, graph_feat_size=2) model.fit(train_set, nb_epoch=1)
def load_dataset(args): splitter = 'scaffold' if args['featurizer'] == 'ECFP': featurizer = 'ECFP' elif args['featurizer'] == 'GC': from deepchem.feat import MolGraphConvFeaturizer featurizer = MolGraphConvFeaturizer() if args['dataset'] == 'BACE_classification': from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BBBP': from deepchem.molnet import load_bbbp tasks, all_dataset, transformers = load_bbbp( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BACE_regression': from deepchem.molnet import load_bace_regression tasks, all_dataset, transformers = load_bace_regression( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'ClinTox': from deepchem.molnet import load_clintox tasks, all_dataset, transformers = load_clintox( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Delaney': from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'HOPV': from deepchem.molnet import load_hopv tasks, all_dataset, transformers = load_hopv( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'SIDER': from deepchem.molnet import load_sider tasks, all_dataset, transformers = load_sider( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Lipo': from deepchem.molnet import load_lipo tasks, all_dataset, transformers = load_lipo( featurizer=featurizer, splitter=splitter, reload=False) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) return args, tasks, all_dataset, transformers
def test_pagtn_classification(): # load datasets featurizer = PagtnMolGraphFeaturizer(max_length=5) tasks, dataset, transformers, metric = get_dataset( 'classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = PagtnModel(mode='classification', n_tasks=n_tasks, batch_size=16) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 # test on a small MoleculeNet dataset from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer) train_set, _, _ = all_dataset model = PagtnModel(mode='classification', n_tasks=len(tasks), batch_size=16) model.fit(train_set, nb_epoch=1)
def get_molecules(): tasks, all_dataset, transformers = load_bace_classification( featurizer="Raw") return all_dataset[0].X
def load_dataset(args): splitter = 'scaffold' if args['featurizer'] == 'ECFP': featurizer = 'ECFP' elif args['featurizer'] == 'GC': from deepchem.feat import MolGraphConvFeaturizer featurizer = MolGraphConvFeaturizer() elif args['featurizer'] == 'AC': from deepchem.feat import AtomicConvFeaturizer featurizer = AtomicConvFeaturizer(frag1_num_atoms=100, frag2_num_atoms=1000, complex_num_atoms=1100, max_num_neighbors=12, neighbor_cutoff=4) if args['dataset'] == 'BACE_classification': from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BBBP': from deepchem.molnet import load_bbbp tasks, all_dataset, transformers = load_bbbp(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BACE_regression': from deepchem.molnet import load_bace_regression tasks, all_dataset, transformers = load_bace_regression( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'ClinTox': from deepchem.molnet import load_clintox tasks, all_dataset, transformers = load_clintox(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Delaney': from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'HOPV': from deepchem.molnet import load_hopv tasks, all_dataset, transformers = load_hopv(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'SIDER': from deepchem.molnet import load_sider tasks, all_dataset, transformers = load_sider(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Lipo': from deepchem.molnet import load_lipo tasks, all_dataset, transformers = load_lipo(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'PDBbind': from deepchem.molnet import load_pdbbind tasks, all_dataset, transformers = load_pdbbind( featurizer=featurizer, save_dir='.', data_dir='.', splitter='random', pocket=True, set_name='core', # refined reload=False) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) return args, tasks, all_dataset, transformers