def test_compute_charges_forward_batched(dgl_carboxylate): batch = DGLMoleculeBatch(dgl_carboxylate, DGLMolecule.from_smiles("[H]Cl", [], [])) inputs = torch.tensor( [ # [H]C(=O)O- form 1 [30.0, 80.0], [35.0, 75.0], [40.0, 70.0], [50.0, 65.0], # [H]C(=O)O- form 2 [30.0, 80.0], [35.0, 75.0], [50.0, 65.0], [40.0, 70.0], # [H]Cl [55.0, 60.0], [60.0, 55.0], ] ) partial_charges = ComputePartialCharges().forward(batch, inputs) assert partial_charges.shape == (6, 1) assert numpy.isclose(partial_charges.sum(), -1.0) # The carboxylate oxygen charges should be identical. assert numpy.allclose(partial_charges[2], partial_charges[3])
def test_compute_charges_forward(dgl_methane): inputs = torch.tensor( [ [30.8, 78.4], [27.4, 73.9], [27.4, 73.9], [27.4, 73.9], [27.4, 73.9], ] ) partial_charges = ComputePartialCharges().forward(dgl_methane, inputs) assert numpy.isclose(partial_charges.sum(), 0.0) assert numpy.allclose(partial_charges[1:], partial_charges[1])
def mock_atom_model() -> DGLMoleculeLightningModel: return DGLMoleculeLightningModel( convolution_module=ConvolutionModule("SAGEConv", in_feats=4, hidden_feats=[4]), readout_modules={ "atom": ReadoutModule( pooling_layer=PoolAtomFeatures(), readout_layers=SequentialLayers(in_feats=4, hidden_feats=[2]), postprocess_layer=ComputePartialCharges(), ), }, learning_rate=0.01, )
def test_forward(self, dgl_methane): model = MoleculeGCNModel( convolution_module=ConvolutionModule("SAGEConv", in_feats=4, hidden_feats=[4]), readout_modules={ "atom": ReadoutModule( pooling_layer=PoolAtomFeatures(), readout_layers=SequentialLayers(in_feats=4, hidden_feats=[2]), postprocess_layer=ComputePartialCharges(), ), }, ) output = model.forward(dgl_methane) assert "atom" in output assert output["atom"].shape == (5, 1)
def test_init(self): model = DGLMoleculeLightningModel( convolution_module=ConvolutionModule("SAGEConv", in_feats=1, hidden_feats=[2, 2]), readout_modules={ "atom": ReadoutModule( pooling_layer=PoolAtomFeatures(), readout_layers=SequentialLayers(in_feats=2, hidden_feats=[2], activation=["Identity"]), postprocess_layer=ComputePartialCharges(), ), "bond": ReadoutModule( pooling_layer=PoolBondFeatures( layers=SequentialLayers(in_feats=4, hidden_feats=[4])), readout_layers=SequentialLayers(in_feats=4, hidden_feats=[8]), ), }, learning_rate=0.01, ) assert model.convolution_module is not None assert isinstance(model.convolution_module, ConvolutionModule) assert isinstance(model.convolution_module.gcn_layers, GCNStack) assert len(model.convolution_module.gcn_layers) == 2 assert all(x in model.readout_modules for x in ["atom", "bond"]) assert isinstance(model.readout_modules["atom"].pooling_layer, PoolAtomFeatures) assert isinstance(model.readout_modules["bond"].pooling_layer, PoolBondFeatures) assert numpy.isclose(model.learning_rate, 0.01)
def main(): print(torch.seed()) # Define the atom / bond features of interest. atom_features = [ AtomicElement(["C", "O", "H"]), AtomConnectivity(), ] bond_features = [ BondOrder(), ] # Compute the total length of the input atomic feature vector n_atom_features = sum(len(feature) for feature in atom_features) # Load in the training and test data training_smiles = ["CO", "CCO", "CCCO", "CCCCO"] training_data = DGLMoleculeDataset.from_smiles( training_smiles, atom_features, bond_features, label_function, ) training_loader = DGLMoleculeDataLoader(training_data, batch_size=len(training_smiles), shuffle=False) test_smiles = [ "CCCCCCCCCO", ] test_loader = DGLMoleculeDataLoader( DGLMoleculeDataset.from_smiles( test_smiles, atom_features, bond_features, label_function, ), batch_size=len(test_smiles), shuffle=False, ) # Define the model. n_gcn_layers = 5 n_gcn_hidden_features = 128 n_am1_layers = 2 n_am1_hidden_features = 64 learning_rate = 0.001 model = DGLMoleculeLightningModel( convolution_module=ConvolutionModule( architecture="SAGEConv", in_feats=n_atom_features, hidden_feats=[n_gcn_hidden_features] * n_gcn_layers, ), readout_modules={ # The keys of the readout modules should correspond to keys in the # label dictionary. "am1-charges": ReadoutModule( pooling_layer=PoolAtomFeatures(), readout_layers=SequentialLayers( in_feats=n_gcn_hidden_features, hidden_feats=[n_am1_hidden_features] * n_am1_layers + [2], activation=["ReLU"] * n_am1_layers + ["Identity"], ), postprocess_layer=ComputePartialCharges(), ) }, learning_rate=learning_rate, ) print(model) # Train the model n_epochs = 100 n_gpus = 0 if not torch.cuda.is_available() else 1 print(f"Using {n_gpus} GPUs") trainer = pl.Trainer(gpus=n_gpus, min_epochs=n_epochs, max_epochs=n_epochs) trainer.fit(model, train_dataloaders=training_loader) trainer.test(model, test_dataloaders=test_loader)
def test_init(self): module = ReadoutModule(PoolAtomFeatures(), SequentialLayers(1, [1]), ComputePartialCharges()) assert isinstance(module.pooling_layer, PoolAtomFeatures) assert isinstance(module.readout_layers, SequentialLayers) assert isinstance(module.postprocess_layer, ComputePartialCharges)
def main( train_set_path, train_batch_size, val_set_path, test_set_path, n_gcn_layers, n_gcn_hidden_features, n_am1_layers, n_am1_hidden_features, learning_rate, n_epochs, ): pprint(locals()) # pl.seed_everything(3992210414) # h-parameter sweep v1 # Define the features of interest. atom_features = [ AtomicElement(["C", "O", "H", "N", "S", "F", "Br", "Cl", "I", "P"]), AtomConnectivity(), AtomAverageFormalCharge(), ] bond_features = [ # BondIsInRing(), # BondOrder() ] # Load in the pre-processed training and test molecules and store them in # featurized graphs. data_module = DGLMoleculeDataModule( atom_features, bond_features, partial_charge_method="am1", bond_order_method=None, train_set_path=train_set_path, train_batch_size=train_batch_size, val_set_path=val_set_path, val_batch_size=None, test_set_path=test_set_path, test_batch_size=None, use_cached_data=True, ) n_atom_features = data_module.n_atom_features # Define the model. model = DGLMoleculeLightningModel( convolution_module=ConvolutionModule( architecture="SAGEConv", in_feats=n_atom_features, hidden_feats=[n_gcn_hidden_features] * n_gcn_layers, ), readout_modules={ "am1-charges": ReadoutModule( pooling_layer=PoolAtomFeatures(), readout_layers=SequentialLayers( in_feats=n_gcn_hidden_features, hidden_feats=[n_am1_hidden_features] * n_am1_layers + [2], activation=["ReLU"] * n_am1_layers + ["Identity"], ), postprocess_layer=ComputePartialCharges(), ) }, learning_rate=learning_rate, ) print(model) # Train the model n_gpus = 0 if not torch.cuda.is_available() else 1 print(f"Using {n_gpus} GPUs") logger = TensorBoardLogger( "lightning-logs", version=( f"{train_batch_size}-" f"{n_gcn_layers}-" f"{n_gcn_hidden_features}-" f"{n_am1_layers}-" f"{n_am1_hidden_features}-" f"{learning_rate}" ), ) trainer = pl.Trainer( gpus=n_gpus, min_epochs=n_epochs, max_epochs=n_epochs, logger=logger ) trainer.fit(model, datamodule=data_module) trainer.test(model, data_module)