def test_linear_regression(): # Comparison of CGnet with sklearn linear regression for linear force # Notes # ----- # This test is quite forgiving in comparing the sklearn/CGnet results # for learning a linear force field/quadratic potential because the decimal # accuracy is set to one decimal point. It could be lower, but the test # might then occassionaly fail due to stochastic reasons associated with # the dataset and the limited training routine. # # For this reason, we use np.testing.assert_almost_equal instead of # np.testing.assert_allclose # First, we instance a CGnet model 2 layers deep and 15 nodes wide layers = LinearLayer(1, 15, activation=nn.Softplus(), bias=True) layers += LinearLayer(15, 15, activation=nn.Softplus(), bias=True) layers += LinearLayer(15, 1, activation=nn.Softplus(), bias=True) model = CGnet(layers, ForceLoss()) # Next, we define the optimizer and train for 35 epochs on the test linear # regression data defined in the preamble optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0) epochs = 35 for i in range(epochs): optimizer.zero_grad() energy, force = model.forward(x0) loss = model.criterion(force, y0) loss.backward() optimizer.step() loss = loss.data.numpy() # We produce numpy verions of the training data x = x0.detach().numpy() y = y0.numpy() # Here, we instance an sklearn linear regression model for comparison to # CGnet lrg = LinearRegression() reg = lrg.fit(x, y) y_pred = reg.predict(x) # Here, we test to to see if MSE losses are close up to a tolerance. np.testing.assert_almost_equal(mse(y, y_pred), loss, decimal=1)
def test_cgnet_simulation(): # Tests a simulation from a CGnet built with the GeometryFeature # for the shapes of its coordinate, force, and potential outputs # First, we set up a bond harmonic prior and a GeometryFeature layer bonds_idx = geom_stats.return_indices('Bonds') bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds', as_list=True) harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions) feature_layer = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) num_feats = feature_layer(coords).size()[1] # Next, we create a 4 layer hidden architecture with a random width # and with a scalar output rand = np.random.randint(1, 10) arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, 1, bias=True, activation=None)) # Next, we instance a CGnet model using the above objects # with force matching as a loss criterion model = CGnet(arch, ForceLoss(), feature=feature_layer, priors=[harmonic_potential]) model.eval() # Here, we produce mock target protein force data forces = torch.randn((frames, beads, 3), requires_grad=False) # Here, we create an optimizer for traning the model, # and we train it for one epoch optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0) optimizer.zero_grad() energy, pred_forces = model.forward(coords) loss = model.criterion(pred_forces, forces) loss.backward() optimizer.step() # Here, we define random simulation frame lengths # as well as randomly choosing to save every 2 or 4 frames length = np.random.choice([2, 4]) * 2 save = np.random.choice([2, 4]) # Here we instance a simulation class and produce a CG trajectory my_sim = Simulation(model, coords, beta=geom_stats.beta, length=length, save_interval=save, save_forces=True, save_potential=True) traj = my_sim.simulate() # We test to see if the trajectory is the proper shape based on the above # choices for simulation length and frame saving assert traj.shape == (frames, length // save, beads, dims) assert my_sim.simulated_forces.shape == (frames, length // save, beads, dims) assert my_sim.simulated_potential.shape == (frames, length // save, 1)
def test_dataset_loss_with_optimizer_and_regularization(): # Test manual batch processing vs. dataset_loss during regularized training # Make a simple model and test that a manual on-the-fly loss calculation # approximately matches the one from dataset_loss when given an optimizer # and regularization function # Set up the network num_epochs = 5 # Empty lists to be compared after training epochal_train_losses_manual = [] epochal_train_losses_dataset = [] # We require two models and two optimizers to keep things separate # The architectures MUST be deep copied or else they are tethered # to each other model_manual = CGnet(copy.deepcopy(arch), ForceLoss()).float() model_dataset = CGnet(copy.deepcopy(arch), ForceLoss()).float() optimizer_manual = torch.optim.Adam(model_manual.parameters(), lr=1e-5) optimizer_dataset = torch.optim.Adam(model_dataset.parameters(), lr=1e-5) # We want a nonrandom loader so we can compare the losses at the end nonrandom_loader = DataLoader(dataset, batch_size=batch_size) for epoch in range(1, num_epochs + 1): train_loss_manual = 0.0 train_loss_dataset = 0.0 # This is the manual part effective_batch_num = 0 for batch_num, batch_data in enumerate(nonrandom_loader): optimizer_manual.zero_grad() coord, force, embedding_property = batch_data if batch_num == 0: ref_batch_size = coord.numel() batch_weight = coord.numel() / ref_batch_size energy, pred_force = model_manual.forward(coord, embedding_property) batch_loss = model_manual.criterion(pred_force, force) batch_loss.backward() optimizer_manual.step() lipschitz_projection(model_manual, strength=lipschitz_strength) train_loss_manual += batch_loss.detach().cpu() * batch_weight effective_batch_num += batch_weight train_loss_manual = train_loss_manual / effective_batch_num epochal_train_losses_manual.append(train_loss_manual.numpy()) # This is the dataset loss part train_loss_dataset = dataset_loss(model_dataset, nonrandom_loader, optimizer_dataset, _regularization_function) epochal_train_losses_dataset.append(train_loss_dataset) np.testing.assert_allclose(epochal_train_losses_manual, epochal_train_losses_dataset, rtol=1e-4)