示例#1
0
def test_atomic_element(openff_methane: Molecule):

    feature = AtomicElement(["H", "C"])
    assert len(feature) == 2

    encoding = feature(openff_methane).numpy()

    assert encoding.shape == (5, 2)

    assert numpy.allclose(encoding[1:, 0], 1.0)
    assert numpy.allclose(encoding[1:, 1], 0.0)

    assert numpy.isclose(encoding[0, 0], 0.0)
    assert numpy.isclose(encoding[0, 1], 1.0)
示例#2
0
    def test_setup(self, tmpdir, mock_data_store):

        data_module = DGLMoleculeDataModule(
            atom_features=[AtomicElement(["Cl", "H"])],
            bond_features=[BondOrder()],
            partial_charge_method="am1bcc",
            bond_order_method="am1",
            train_set_path=mock_data_store,
            train_batch_size=None,
            output_path=os.path.join(tmpdir, "tmp.pkl"),
            use_cached_data=False,
        )
        data_module.prepare_data()
        data_module.setup()

        assert isinstance(data_module._train_data.datasets[0],
                          DGLMoleculeDataset)
        assert data_module._val_data is None
        assert data_module._test_data is None
示例#3
0
    def test_prepare_cache(self, use_cached_data, expected_raises, tmpdir,
                           mock_data_store):

        data_module = DGLMoleculeDataModule(
            atom_features=[AtomicElement(["Cl", "H"])],
            bond_features=[BondOrder()],
            partial_charge_method="am1bcc",
            bond_order_method="am1",
            train_set_path=mock_data_store,
            train_batch_size=None,
            output_path=os.path.join(tmpdir, "tmp.pkl"),
            use_cached_data=use_cached_data,
        )

        with open(data_module._output_path, "wb") as file:
            pickle.dump((None, None, None), file)

        with expected_raises:
            data_module.prepare_data()
示例#4
0
    def mock_data_module(self) -> DGLMoleculeDataModule:

        return DGLMoleculeDataModule(
            atom_features=[
                AtomicElement(["C", "H", "Cl"]),
                AtomFormalCharge([0, 1])
            ],
            bond_features=[BondOrder()],
            partial_charge_method="am1bcc",
            bond_order_method="am1",
            train_set_path="train.sqlite",
            train_batch_size=1,
            val_set_path="val.sqlite",
            val_batch_size=2,
            test_set_path="test.sqlite",
            test_batch_size=3,
            output_path="tmp.pkl",
            use_cached_data=True,
            molecule_to_dgl=TestDGLMoleculeDataModule.mock_molecule_to_dgl,
        )
示例#5
0
    def test_prepare(self, tmpdir, mock_data_store):

        data_module = DGLMoleculeDataModule(
            atom_features=[AtomicElement(["Cl", "H"])],
            bond_features=[BondOrder()],
            partial_charge_method="am1bcc",
            bond_order_method="am1",
            train_set_path=mock_data_store,
            train_batch_size=None,
            val_set_path=mock_data_store,
            test_set_path=mock_data_store,
            output_path=os.path.join(tmpdir, "tmp.pkl"),
        )
        data_module.prepare_data()

        assert os.path.isfile(data_module._output_path)

        with open(data_module._output_path, "rb") as file:
            datasets = pickle.load(file)

        assert all(isinstance(dataset, ConcatDataset) for dataset in datasets)
        assert all(dataset.datasets[0].n_features == 2 for dataset in datasets)
def main():

    print(torch.seed())

    # Define the atom / bond features of interest.
    atom_features = [
        AtomicElement(["C", "O", "H"]),
        AtomConnectivity(),
    ]
    bond_features = [
        BondOrder(),
    ]

    # Compute the total length of the input atomic feature vector
    n_atom_features = sum(len(feature) for feature in atom_features)

    # Load in the training and test data
    training_smiles = ["CO", "CCO", "CCCO", "CCCCO"]
    training_data = DGLMoleculeDataset.from_smiles(
        training_smiles,
        atom_features,
        bond_features,
        label_function,
    )
    training_loader = DGLMoleculeDataLoader(training_data,
                                            batch_size=len(training_smiles),
                                            shuffle=False)

    test_smiles = [
        "CCCCCCCCCO",
    ]
    test_loader = DGLMoleculeDataLoader(
        DGLMoleculeDataset.from_smiles(
            test_smiles,
            atom_features,
            bond_features,
            label_function,
        ),
        batch_size=len(test_smiles),
        shuffle=False,
    )

    # Define the model.
    n_gcn_layers = 5
    n_gcn_hidden_features = 128

    n_am1_layers = 2
    n_am1_hidden_features = 64

    learning_rate = 0.001

    model = DGLMoleculeLightningModel(
        convolution_module=ConvolutionModule(
            architecture="SAGEConv",
            in_feats=n_atom_features,
            hidden_feats=[n_gcn_hidden_features] * n_gcn_layers,
        ),
        readout_modules={
            # The keys of the readout modules should correspond to keys in the
            # label dictionary.
            "am1-charges":
            ReadoutModule(
                pooling_layer=PoolAtomFeatures(),
                readout_layers=SequentialLayers(
                    in_feats=n_gcn_hidden_features,
                    hidden_feats=[n_am1_hidden_features] * n_am1_layers + [2],
                    activation=["ReLU"] * n_am1_layers + ["Identity"],
                ),
                postprocess_layer=ComputePartialCharges(),
            )
        },
        learning_rate=learning_rate,
    )

    print(model)

    # Train the model
    n_epochs = 100

    n_gpus = 0 if not torch.cuda.is_available() else 1
    print(f"Using {n_gpus} GPUs")

    trainer = pl.Trainer(gpus=n_gpus, min_epochs=n_epochs, max_epochs=n_epochs)

    trainer.fit(model, train_dataloaders=training_loader)
    trainer.test(model, test_dataloaders=test_loader)
示例#7
0
def main(
    train_set_path,
    train_batch_size,
    val_set_path,
    test_set_path,
    n_gcn_layers,
    n_gcn_hidden_features,
    n_am1_layers,
    n_am1_hidden_features,
    learning_rate,
    n_epochs,
):

    pprint(locals())

    # pl.seed_everything(3992210414)  # h-parameter sweep v1

    # Define the features of interest.
    atom_features = [
        AtomicElement(["C", "O", "H", "N", "S", "F", "Br", "Cl", "I", "P"]),
        AtomConnectivity(),
        AtomAverageFormalCharge(),
    ]
    bond_features = [
        # BondIsInRing(),
        # BondOrder()
    ]

    # Load in the pre-processed training and test molecules and store them in
    # featurized graphs.
    data_module = DGLMoleculeDataModule(
        atom_features,
        bond_features,
        partial_charge_method="am1",
        bond_order_method=None,
        train_set_path=train_set_path,
        train_batch_size=train_batch_size,
        val_set_path=val_set_path,
        val_batch_size=None,
        test_set_path=test_set_path,
        test_batch_size=None,
        use_cached_data=True,
    )
    n_atom_features = data_module.n_atom_features

    # Define the model.
    model = DGLMoleculeLightningModel(
        convolution_module=ConvolutionModule(
            architecture="SAGEConv",
            in_feats=n_atom_features,
            hidden_feats=[n_gcn_hidden_features] * n_gcn_layers,
        ),
        readout_modules={
            "am1-charges": ReadoutModule(
                pooling_layer=PoolAtomFeatures(),
                readout_layers=SequentialLayers(
                    in_feats=n_gcn_hidden_features,
                    hidden_feats=[n_am1_hidden_features] * n_am1_layers + [2],
                    activation=["ReLU"] * n_am1_layers + ["Identity"],
                ),
                postprocess_layer=ComputePartialCharges(),
            )
        },
        learning_rate=learning_rate,
    )
    print(model)

    # Train the model
    n_gpus = 0 if not torch.cuda.is_available() else 1
    print(f"Using {n_gpus} GPUs")

    logger = TensorBoardLogger(
        "lightning-logs",
        version=(
            f"{train_batch_size}-"
            f"{n_gcn_layers}-"
            f"{n_gcn_hidden_features}-"
            f"{n_am1_layers}-"
            f"{n_am1_hidden_features}-"
            f"{learning_rate}"
        ),
    )

    trainer = pl.Trainer(
        gpus=n_gpus, min_epochs=n_epochs, max_epochs=n_epochs, logger=logger
    )

    trainer.fit(model, datamodule=data_module)
    trainer.test(model, data_module)