Пример #1
0
def test_linear_regression():
    # Comparison of CGnet with sklearn linear regression for linear force

    # Notes
    # -----
    # This test is quite forgiving in comparing the sklearn/CGnet results
    # for learning a linear force field/quadratic potential because the decimal
    # accuracy is set to one decimal point. It could be lower, but the test
    # might then occassionaly fail due to stochastic reasons associated with
    # the dataset and the limited training routine.
    #
    # For this reason, we use np.testing.assert_almost_equal instead of
    # np.testing.assert_allclose

    # First, we instance a CGnet model 2 layers deep and 15 nodes wide
    layers = LinearLayer(1, 15, activation=nn.Softplus(), bias=True)
    layers += LinearLayer(15, 15, activation=nn.Softplus(), bias=True)
    layers += LinearLayer(15, 1, activation=nn.Softplus(), bias=True)
    model = CGnet(layers, ForceLoss())

    # Next, we define the optimizer and train for 35 epochs on the test linear
    # regression data defined in the preamble
    optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0)
    epochs = 35
    for i in range(epochs):
        optimizer.zero_grad()
        energy, force = model.forward(x0)
        loss = model.criterion(force, y0)
        loss.backward()
        optimizer.step()
    loss = loss.data.numpy()

    # We produce numpy verions of the training data
    x = x0.detach().numpy()
    y = y0.numpy()

    # Here, we instance an sklearn linear regression model for comparison to
    # CGnet
    lrg = LinearRegression()
    reg = lrg.fit(x, y)
    y_pred = reg.predict(x)

    # Here, we test to to see if MSE losses are close up to a tolerance.
    np.testing.assert_almost_equal(mse(y, y_pred), loss, decimal=1)
Пример #2
0
def test_cgnet_simulation():
    # Tests a simulation from a CGnet built with the GeometryFeature
    # for the shapes of its coordinate, force, and potential outputs

    # First, we set up a bond harmonic prior and a GeometryFeature layer
    bonds_idx = geom_stats.return_indices('Bonds')
    bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds',
                                                            as_list=True)
    harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions)
    feature_layer = GeometryFeature(feature_tuples='all_backbone',
                                    n_beads=beads)
    num_feats = feature_layer(coords).size()[1]

    # Next, we create a 4 layer hidden architecture with a random width
    # and with a scalar output
    rand = np.random.randint(1, 10)
    arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, 1, bias=True, activation=None))

    # Next, we instance a CGnet model using the above objects
    # with force matching as a loss criterion
    model = CGnet(arch,
                  ForceLoss(),
                  feature=feature_layer,
                  priors=[harmonic_potential])
    model.eval()

    # Here, we produce mock target protein force data
    forces = torch.randn((frames, beads, 3), requires_grad=False)

    # Here, we create an optimizer for traning the model,
    # and we train it for one epoch
    optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0)
    optimizer.zero_grad()
    energy, pred_forces = model.forward(coords)
    loss = model.criterion(pred_forces, forces)
    loss.backward()
    optimizer.step()

    # Here, we define random simulation frame lengths
    # as well as randomly choosing to save every 2 or 4 frames
    length = np.random.choice([2, 4]) * 2
    save = np.random.choice([2, 4])

    # Here we instance a simulation class and produce a CG trajectory
    my_sim = Simulation(model,
                        coords,
                        beta=geom_stats.beta,
                        length=length,
                        save_interval=save,
                        save_forces=True,
                        save_potential=True)

    traj = my_sim.simulate()

    # We test to see if the trajectory is the proper shape based on the above
    # choices for simulation length and frame saving
    assert traj.shape == (frames, length // save, beads, dims)
    assert my_sim.simulated_forces.shape == (frames, length // save, beads,
                                             dims)
    assert my_sim.simulated_potential.shape == (frames, length // save, 1)
Пример #3
0
def test_dataset_loss_with_optimizer_and_regularization():
    # Test manual batch processing vs. dataset_loss during regularized training
    # Make a simple model and test that a manual on-the-fly loss calculation
    # approximately matches the one from dataset_loss when given an optimizer
    # and regularization function

    # Set up the network
    num_epochs = 5

    # Empty lists to be compared after training
    epochal_train_losses_manual = []
    epochal_train_losses_dataset = []

    # We require two models and two optimizers to keep things separate
    # The architectures MUST be deep copied or else they are tethered
    # to each other
    model_manual = CGnet(copy.deepcopy(arch), ForceLoss()).float()
    model_dataset = CGnet(copy.deepcopy(arch), ForceLoss()).float()

    optimizer_manual = torch.optim.Adam(model_manual.parameters(), lr=1e-5)
    optimizer_dataset = torch.optim.Adam(model_dataset.parameters(), lr=1e-5)

    # We want a nonrandom loader so we can compare the losses at the end
    nonrandom_loader = DataLoader(dataset, batch_size=batch_size)

    for epoch in range(1, num_epochs + 1):
        train_loss_manual = 0.0
        train_loss_dataset = 0.0

        # This is the manual part
        effective_batch_num = 0

        for batch_num, batch_data in enumerate(nonrandom_loader):
            optimizer_manual.zero_grad()
            coord, force, embedding_property = batch_data

            if batch_num == 0:
                ref_batch_size = coord.numel()

            batch_weight = coord.numel() / ref_batch_size

            energy, pred_force = model_manual.forward(coord,
                                                      embedding_property)

            batch_loss = model_manual.criterion(pred_force, force)
            batch_loss.backward()
            optimizer_manual.step()

            lipschitz_projection(model_manual, strength=lipschitz_strength)

            train_loss_manual += batch_loss.detach().cpu() * batch_weight
            effective_batch_num += batch_weight

        train_loss_manual = train_loss_manual / effective_batch_num
        epochal_train_losses_manual.append(train_loss_manual.numpy())

        # This is the dataset loss part
        train_loss_dataset = dataset_loss(model_dataset, nonrandom_loader,
                                          optimizer_dataset,
                                          _regularization_function)
        epochal_train_losses_dataset.append(train_loss_dataset)

    np.testing.assert_allclose(epochal_train_losses_manual,
                               epochal_train_losses_dataset,
                               rtol=1e-4)