def test_equality_with_cgnet_distances(): # Make sure CA distances are consistent with GeometryFeature geom_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) out = geom_feature.forward(data_tensor) molecule = CGMolecule(names=names, resseq=resseq, resmap=resmap) traj = molecule.make_trajectory(data) # Calculate all pairs of CA distances CA_inds = [i for i, name in enumerate(names) if name == 'CA'] CA_pairs = [] # these are feature tuples for i, ind1 in enumerate(CA_inds[:-1]): for j, ind2 in enumerate(CA_inds[i + 1:]): CA_pairs.append((ind1, ind2)) mdtraj_CA_dists = md.compute_distances(traj, CA_pairs) # map each CA distance feature tuple to the integer index CA_feature_tuple_dict = { key: i for i, key in enumerate(geom_feature.descriptions['Distances']) if key in CA_pairs } # retrieve CA distances only from the feature object cgnet_CA_dists = geom_feature.distances.numpy( )[:, [CA_feature_tuple_dict[key] for key in CA_pairs]] np.testing.assert_allclose(mdtraj_CA_dists, cgnet_CA_dists, rtol=1e-6)
def test_distance_features(): # Make sure pairwise distance features are consistent with scipy geom_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) # Forward pass calculates features (distances, angles, dihedrals) # and makes them accessible as attributes _ = geom_feature.forward(data_tensor) # Test each frame x_i for frame_ind in range(frames): Dmat_xi = scipy.spatial.distance.squareform( scipy.spatial.distance.pdist(data[frame_ind])) xi_feature_distances = list(geom_feature.distances[frame_ind].numpy()) feature_descriptions = geom_feature.descriptions['Distances'] # Arrange the scipy distances in the right order for comparing # to the GeometryFeature distances xi_scipy_distances = [ Dmat_xi[feature_descriptions[i]] for i in range(len(feature_descriptions)) ] np.testing.assert_allclose(xi_feature_distances, xi_scipy_distances, rtol=1e-6)
def test_dihedral_features(): # Make sure backbone dihedral features are consistent with manual calculation # For spatial coordinates a, b, c, d, the dihedral \alpha describing # a-b-c-d (i.e., the plane between angles a-b-c- and b-c-d-) is calculated # using the following formula: # # \overline{ba} = b - a # \overline{cb} = c - a # \overline{dc} = d - c # # % normal vector with plane of first and second angles, respectively # n_1 = \overline{ba} \times \overline{cb} # n_2 = \overline{cb} \ times \overline{dc} # # m_1 = n_2 \times n_1 # # \sin(\alpha) = \frac{m_1 \dot \overline{cb}} # {\sqrt{\overline{cb} \dot \overline{cb}}} # \cos(\alpha) = n_2 \dot n_1 # \alpha = \arctan{\frac{\sin(\alpha)}{\cos(\alpha)}} geom_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) # Forward pass calculates features (distances, angles, dihedrals) # and makes them accessible as attributes _ = geom_feature.forward(data_tensor) # Manually calculate the dihedrals one frame at a time diheds = [] for frame_data in data: dihed_list = [] for i in range(data.shape[1] - 3): a = frame_data[i] b = frame_data[i + 1] c = frame_data[i + 2] d = frame_data[i + 3] ba = b - a cb = c - b dc = d - c n1 = np.cross(ba, cb) n2 = np.cross(cb, dc) m1 = np.cross(n2, n1) term1 = np.dot(m1, cb) / np.sqrt(np.dot(cb, cb)) term2 = np.dot(n2, n1) dihed_list.append(np.arctan2(term1, term2)) diheds.append(dihed_list) # Instead of comparing the sines and cosines, compare the arctans feature_diheds = [ np.arctan2(geom_feature.dihedral_sines[i].numpy(), geom_feature.dihedral_cosines[i].numpy()) for i in range(len(geom_feature.dihedral_sines)) ] np.testing.assert_allclose(np.abs(feature_diheds), np.abs(diheds), rtol=1e-4)
def test_zscore_layer(): # Tests ZscoreLayer() for correct normalization # Notes # ----- # rescaled_feat_truth is in principle equal to: # # from sklearn.preprocessing import StandardScaler # scalar = StandardScaler() # rescaled_feat_truth = scalar.fit_transform(feat) # # However, the equality is only preserved with precision >= 1e-4. # Complete prior dictionary full_prior_stats = geom_stats.get_prior_statistics() # First compute the reference zscore-rescaled features zscores = torch.zeros((2, len(full_prior_stats))) for i, key in enumerate(full_prior_stats.keys()): zscores[0, i] = full_prior_stats[key]['mean'] zscores[1, i] = full_prior_stats[key]['std'] # Then we create a feature layer and featurize our linear protein test data geom_feat = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) feat = geom_feat(coords) rescaled_feat_truth = (feat - zscores[0, :]) / zscores[1, :] # Next, we instance a ZscoreLayer and test to see if its forward # method matches the reference calculation above zlayer = ZscoreLayer(zscores) rescaled_feat = zlayer(feat) np.testing.assert_array_equal(rescaled_feat.detach().numpy(), rescaled_feat_truth.detach().numpy())
def test_equality_with_cgnet_angles(): # Make sure CA distances caluclated internally are consistent with mdtraj. # This test appears here because it requires an mdtraj dependency. molecule = CGMolecule(names=names, resseq=resseq, resmap=resmap) traj = molecule.make_trajectory(data) # Grab the CA inds only to get the backbone angles and compute them # with mdtraj CA_inds = [i for i, name in enumerate(names) if name == 'CA'] backbone_angles = [(CA_inds[i], CA_inds[i + 1], CA_inds[i + 2]) for i in range(len(CA_inds) - 2)] mdtraj_angles = md.compute_angles(traj, backbone_angles) # Get the GeometryFeature for just the geom_feature = GeometryFeature(feature_tuples=backbone_angles) out = geom_feature.forward(data_tensor) cgnet_angles = geom_feature.angles np.testing.assert_allclose(mdtraj_angles, cgnet_angles, rtol=1e-4)
def test_distance_index_shuffling(): # Make sure shuffled distances return the right results # Create a dataset with one frame, 10 beads, 3 dimensions data_to_shuffle = np.random.randn(1, 10, 3) data_to_shuffle_tensor = torch.Tensor(data_to_shuffle) y_dist_inds, _ = g.get_distance_indices(10) geom_feature = GeometryFeature(feature_tuples=y_dist_inds) # Forward pass calculates features (distances, angles, dihedrals) # and makes them accessible as attributes _ = geom_feature.forward(data_to_shuffle_tensor) # Shuffle the distances indices inds = np.arange(len(y_dist_inds)) np.random.shuffle(inds) shuffled_inds = [tuple(i) for i in np.array(y_dist_inds)[inds]] geom_feature_shuffle = GeometryFeature(feature_tuples=shuffled_inds) _ = geom_feature_shuffle.forward(data_to_shuffle_tensor) # See if the non-shuffled distances are the same when indexexed according # to the shuffling np.testing.assert_array_equal(geom_feature_shuffle.distances[0], geom_feature.distances[0][inds])
def test_equality_with_cgnet_dihedrals(): # Make sure dihedrals are consistent with GeometryFeature geom_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) out = geom_feature.forward(data_tensor) molecule = CGMolecule(names=names, resseq=resseq, resmap=resmap) traj = molecule.make_trajectory(data) mdtraj_phis = md.compute_phi(traj)[1] mdtraj_psis = md.compute_psi(traj)[1] mdtraj_phi_cosines = np.cos(mdtraj_phis) mdtraj_phi_sines = np.sin(mdtraj_phis) mdtraj_psi_cosines = np.cos(mdtraj_psis) mdtraj_psi_sines = np.sin(mdtraj_psis) # To get phi's and psi's out of cgnet, we need to specify which # indices they correspond to along the backbone # ['N', 'CA', 'C', 'N'] dihedrals phi_inds = [i * 3 for i in range(residues)] # ['CA', 'C', 'N', 'CA'] dihedrals psi_inds = [i * 3 + 1 for i in range(residues)] cgnet_phi_cosines = geom_feature.dihedral_cosines.numpy()[:, phi_inds] cgnet_phi_sines = geom_feature.dihedral_sines.numpy()[:, phi_inds] cgnet_psi_cosines = geom_feature.dihedral_cosines.numpy()[:, psi_inds] cgnet_psi_sines = geom_feature.dihedral_sines.numpy()[:, psi_inds] np.testing.assert_allclose(mdtraj_phi_cosines, cgnet_phi_cosines, rtol=1e-4) np.testing.assert_allclose(mdtraj_phi_sines, cgnet_phi_sines, rtol=1e-4) np.testing.assert_allclose(mdtraj_psi_cosines, cgnet_psi_cosines, rtol=1e-4) np.testing.assert_allclose(mdtraj_psi_sines, cgnet_psi_sines, rtol=1e-4)
def test_dihedral_index_shuffling(): # Make sure shuffled dihedrals return the right results # Create a dataset with one frame, 100 beads, 3 dimensions data_to_shuffle = np.random.randn(1, 100, 3) data_to_shuffle_tensor = torch.Tensor(data_to_shuffle) y_dihed_inds = [(i, i + 1, i + 2, i + 3) for i in range(100 - 3)] geom_feature = GeometryFeature(feature_tuples=y_dihed_inds) # Forward pass calculates features (distances, angles, dihedrals) # and makes them accessible as attributes _ = geom_feature.forward(data_to_shuffle_tensor) # Shuffle all the inds that can serve as a dihedral start inds = np.arange(100 - 3) np.random.shuffle(inds) shuffled_inds = [tuple(i) for i in np.array(y_dihed_inds)[inds]] geom_feature_shuffle = GeometryFeature(feature_tuples=shuffled_inds) _ = geom_feature_shuffle.forward(data_to_shuffle_tensor) # See if the non-shuffled dihedral sines and cosines are the same when # indexexed according to the shuffling np.testing.assert_allclose(geom_feature_shuffle.dihedral_cosines[0], geom_feature.dihedral_cosines[0][inds], rtol=1e-5) np.testing.assert_allclose(geom_feature_shuffle.dihedral_sines[0], geom_feature.dihedral_sines[0][inds], rtol=1e-5)
def test_backbone_angle_features(): # Make sure backbone angle features are consistent with manual calculation # For spatial coordinates a, b, c, the angle \theta describing a-b-c # is calculated using the following formula: # # \overline{ba} = a - b # \overline{cb} = c - b # \cos(\theta) = (\frac{\overline{ba} \dot \overline{cb}} # {||\overline{ba}|| ||\overline{cb}||} # \theta = \arccos(\theta) geom_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) # Forward pass calculates features (distances, angles, dihedrals) # and makes them accessible as attributes _ = geom_feature.forward(data_tensor) # Manually calculate the angles one frame at a time angles = [] for frame_data in data: angle_list = [] for i in range(data.shape[1] - 2): a = frame_data[i] b = frame_data[i + 1] c = frame_data[i + 2] ba = a - b cb = c - b cos_angle = np.dot(ba, cb) / (np.linalg.norm(ba) * np.linalg.norm(cb)) angle = np.arccos(cos_angle) angle_list.append(angle) angles.append(angle_list) np.testing.assert_allclose(geom_feature.angles, angles, rtol=1e-4)
def test_cgnet(): # Tests CGnet class criterion attribute, architecture size, and network # output size. Also tests priors for proper residual connection to # feature layer. # First, we set up a bond harmonic prior and a GeometryFeature layer bonds_idx = geom_stats.return_indices('Bonds') bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds', as_list=True) harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions) feature_layer = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) num_feats = feature_layer(coords).size()[1] # Next, we create a 4 layer hidden architecture with a random width # and with a scalar output rand = np.random.randint(1, 10) arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, 1, bias=True, activation=None)) # Next, we instance a CGnet model using the above objects # with force matching as a loss criterion model = CGnet(arch, ForceLoss(), feature=feature_layer, priors=[harmonic_potential]) # Test to see if the prior is embedded assert model.priors is not None # Test to see if the hidden architexture has the correct length assert len(arch) == len(model.arch) # Test to see if criterion is embedded correctly assert isinstance(model.criterion, ForceLoss) # Next, we forward the test protein data from the preamble through # the model energy, force = model.forward(coords) # Here, we test to see if the predicted energy is scalar # and the predicted forces are the same dimension as the input coordinates np.testing.assert_array_equal(energy.size(), (coords.size()[0], 1)) np.testing.assert_array_equal(force.size(), coords.size())
def test_combiner_shape_with_geometry_propagation(): # This tests a network with schnet features in which the geometry features # are also propagated through the neural network # This calculates all pairwise distances and backbone angles and dihedrals full_geometry_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=n_beads) schnet_feature, embedding_property, feature_size = _get_random_schnet_feature( calculate_geometry=False) layer_list = [full_geometry_feature, schnet_feature] # grab distance indices dist_idx = geom_stats.return_indices('Distances') # Here, we set propagate_geometry to true feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx, propagate_geometry=True) # The length of the geometry feature is the length of its tuples, where # each four-body dihedral is double counted to account for cosines and sines geom_feature_length = (len(full_geometry_feature.feature_tuples) + len([f for f in full_geometry_feature.feature_tuples if len(f) == 4])) # The total_size is what we need to input into our first linear layer, and # it represents the concatenation of the flatted schnet features with the # geometry features total_size = feature_size*n_beads + geom_feature_length # Now we just repeat the procedure from test_combiner_full above width = np.random.randint(5, high=10) # random fully-connected width arch = LinearLayer(total_size, width, activation=nn.Tanh()) for i in range(2): arch += LinearLayer(width, width, activation=nn.Tanh()) arch += LinearLayer(width, 1, activation=None) model = CGnet(arch, ForceLoss(), feature=feature_combiner, priors=[bond_potential]) # Next, we forward the random protein data through the model energy, forces = model.forward(coords_torch, embedding_property=embedding_property) # Ensure CGnet output has the correct size np.testing.assert_array_equal(energy.size(), (n_frames, 1)) np.testing.assert_array_equal(forces.size(), (n_frames, n_beads, 3))
def test_repulsion_layer(): # Tests RepulsionLayer class for calculation and output size # First, we use the preamble repulsion variables to instance a # a RepulsionLayer. We pass the output of a feature layer to compare # RepulsionLayer forward method to manual energy calculation # The following sets up distance variables # List of distances at least 2 beads apart for RepulsionLayer tests repul_distances = [ i for i in geom_stats.descriptions['Distances'] if abs(i[0] - i[1]) > 2 ] repul_idx = geom_stats.return_indices(repul_distances) # Indices of beads # Random excluded volumes ex_vols = np.random.uniform(2, 8, len(repul_distances)) # Random interaction exponentials exps = np.random.randint(1, 6, len(repul_distances)) # List of interaction dictionaries for forming RepulsionLayers repul_list = [{ 'ex_vol': ex_vol, "exp": exp } for ex_vol, exp in zip(ex_vols, exps)] repulsion_potential = RepulsionLayer(repul_idx, repul_list) geom_feat = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) output_features = geom_feat(coords) energy = repulsion_potential( output_features[:, repulsion_potential.callback_indices]) # Test to see if RepulsionLayer ouput is scalar energy np.testing.assert_array_equal(energy.size(), (frames, 1)) # Test to see if RepulsionLayer callback_indices are correct assert repul_idx == repulsion_potential.callback_indices # Next, we test to see if the manually calculated energy # matches the output of the RepulsionLayer p1 = torch.tensor(ex_vols).float() p2 = torch.tensor(exps).float() energy_check = torch.sum( (p1 / output_features[:, repul_idx])**p2, 1).reshape( len(output_features), 1) / 2 np.testing.assert_array_equal(energy.detach().numpy(), energy_check.detach().numpy())
def test_harmonic_layer(): # Tests HarmonicLayer class for calculation and output size # Set up bond indices (integers) and interactiosn bonds_idx = geom_stats.return_indices('Bonds') # Bond indices # List of bond interaction dictionaries for assembling priors bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds', as_list=True) # First, we use the preamble bond variable to instance a # HarmonicLayer. We pass the output of a feature layer to compare # HarmonicLayer forward method to manual energy calculation harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions) geom_feat = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) output_features = geom_feat(coords) energy = harmonic_potential( output_features[:, harmonic_potential.callback_indices]) # Test to see if HarmonicLayer output is scalar energy np.testing.assert_array_equal(energy.size(), (frames, 1)) # Test to see if HarmonicLayer callback_indices are correct assert bonds_idx == harmonic_potential.callback_indices # Next, we test to see if the manually calculated energy # matches the output of the HarmonicLayer feature_stats = geom_stats.get_prior_statistics('Bonds') harmonic_parameters = torch.tensor([]) for bead_tuple, stat in feature_stats.items(): harmonic_parameters = torch.cat( (harmonic_parameters, torch.tensor([[stat['k']], [stat['mean']]])), dim=1) energy_check = torch.sum( harmonic_parameters[0, :] * (output_features[:, bonds_idx] - harmonic_parameters[1, :])**2, 1).reshape(len(output_features), 1) / 2 np.testing.assert_array_equal(energy.detach().numpy(), energy_check.detach().numpy())
def test_prior_callback_order_2(): # The order of callback indices should not change the final HarmonicLayer # energy output - so here we test to see if the shuffled HarmonicLayer output # matches a manual calculation using the default order # We use the same setup as in test_prior_callback_order_1, but add further # add a GeometryFeature layer for comparing energy outputs geom_feat = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) output_features = geom_feat(coords) bonds_tuples = [ beads for beads in geom_stats.master_description_tuples if len(beads) == 2 and abs(beads[0] - beads[1]) == 1 ] np.random.shuffle(bonds_tuples) bonds_idx = geom_stats.return_indices(bonds_tuples) bonds_interactions, _ = geom_stats.get_prior_statistics( features=list(bonds_tuples), as_list=True) harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions) # Here, we test the energy of the shuffled HarmonicLayer with a manual # calculation according to the default GeometryStatistics bond order energy = harmonic_potential( output_features[:, harmonic_potential.callback_indices]) feature_stats = geom_stats.get_prior_statistics('Bonds') feature_idx = geom_stats.return_indices('Bonds') harmonic_parameters = torch.tensor([]) for bead_tuple, stat in feature_stats.items(): harmonic_parameters = torch.cat( (harmonic_parameters, torch.tensor([[stat['k']], [stat['mean']]])), dim=1) energy_check = torch.sum( harmonic_parameters[0, :] * (output_features[:, feature_idx] - harmonic_parameters[1, :])**2, 1).reshape(len(output_features), 1) / 2 np.testing.assert_allclose(np.sum(energy.detach().numpy()), np.sum(energy_check.detach().numpy()), rtol=1e-4)
def test_feature_combiner_shapes(): # Test feature combiner shapes with geometry features and schnet full_geometry_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=n_beads) schnet_feature, embedding_property, feature_size = _get_random_schnet_feature( calculate_geometry=False) layer_list = [full_geometry_feature, schnet_feature] # grab distance indices dist_idx = geom_stats.return_indices('Distances') # Here, we set propagate_geometry to true feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx, propagate_geometry=True) # The length of the geometry feature is the length of its tuples, where # each four-body dihedral is double counted to account for cosines and sines geom_feature_length = (len(full_geometry_feature.feature_tuples) + len([f for f in full_geometry_feature.feature_tuples if len(f) == 4])) # The total_size is what we need to input into our first linear layer, and # it represents the concatenation of the flatted schnet features with the # geometry features total_size = feature_size*n_beads + geom_feature_length # The forward method returns the object to be propagated to the NN and # the geometry features. feature_output, geometry_features = feature_combiner.forward(coords_torch, embedding_property) np.testing.assert_array_equal(feature_output.shape, [n_frames, n_beads, feature_size]) np.testing.assert_array_equal(geometry_features.shape, [n_frames, geom_feature_length])
def test_lipschitz_full_model_random_mask(): # Test lipschitz mask functionality for random binary schnet mask # and random binary terminal network mask for a model that contains # both SchnetFeatures and a terminal network # using strong Lipschitz projection ( lambda_ << 1 ) # If the mask element is True, a strong Lipschitz projection # should occur - else, the weights should remain unchanged. # Here we ceate a CGSchNet model with a GeometryFeature layer, # 10 interaction blocks, a random feature size, embedding, and # cutoff from the setup at the top of this file, and a terminal # network of 10 layers and with a random width width = np.random.randint(10, high=20) test_arch = LinearLayer(feature_size, width, activation=nn.Tanh()) for _ in range(9): test_arch += LinearLayer(width, width, activation=nn.Tanh()) test_arch += LinearLayer(width, 1, activation=None) schnet_feature = SchnetFeature(feature_size=feature_size, embedding_layer=embedding_layer, rbf_layer=rbf_layer, n_interaction_blocks=10, n_beads=beads, neighbor_cutoff=neighbor_cutoff, calculate_geometry=False) feature_list = FeatureCombiner([ GeometryFeature(feature_tuples='all_backbone', n_beads=beads), schnet_feature ], distance_indices=dist_idx) full_test_model = CGnet(test_arch, ForceLoss(), feature=feature_list) # The pre_projection weights are the terminal network weights followed by # the SchnetFeature weights lambda_ = float(1e-12) pre_projection_terminal_network_weights = [ layer.weight.data for layer in full_test_model.arch if isinstance(layer, nn.Linear) ] pre_projection_schnet_weights = _schnet_feature_linear_extractor( full_test_model.feature.layer_list[-1], return_weight_data_only=True) full_pre_projection_weights = (pre_projection_terminal_network_weights + pre_projection_schnet_weights) # Next, we assemble the masks for both the terminal network and the # SchnetFeature weights. There are 5 instances of nn.Linear for each # interaction block in the SchnetFeature network_lip_mask = [ np.random.randint(2, dtype=bool) for _ in range( len([ layer for layer in full_test_model.arch if isinstance(layer, nn.Linear) ])) ] schnet_lip_mask = [ np.random.randint(2, dtype=bool) for _ in range(5 * len(schnet_feature.interaction_blocks)) ] full_lip_mask = network_lip_mask + schnet_lip_mask # Here we make the lipschitz projection lipschitz_projection(full_test_model, lambda_, network_mask=network_lip_mask, schnet_mask=schnet_lip_mask) post_projection_terminal_network_weights = [ layer.weight.data for layer in full_test_model.arch if isinstance(layer, nn.Linear) ] post_projection_schnet_weights = _schnet_feature_linear_extractor( full_test_model.feature.layer_list[-1], return_weight_data_only=True) full_post_projection_weights = (post_projection_terminal_network_weights + post_projection_schnet_weights) # Here we verify that the masked layers remain unaffected by the strong # Lipschitz projection for mask_element, pre, post in zip(full_lip_mask, full_pre_projection_weights, full_post_projection_weights): # If the mask element is True then the norm of the weights should be greatly # reduced after the lipschitz projection if mask_element: np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, pre.numpy(), post.numpy()) assert np.linalg.norm(pre.numpy()) > np.linalg.norm(post.numpy()) # If the mask element is False then the weights should be unaffected if not mask_element: np.testing.assert_array_equal(pre.numpy(), post.numpy())
get_backbone_angles=False, get_backbone_dihedrals=False, get_redundant_distance_mapping=True) bonds_list, _ = geom_stats.get_prior_statistics('Bonds', as_list=True) bonds_idx = geom_stats.return_indices('Bonds') # Here we use the bond statistics to create a HarmonicLayer bond_potential = HarmonicLayer(bonds_idx, bonds_list) # Next, we produce the zscore statistics and create a ZscoreLayer zscores, _ = geom_stats.get_zscore_array() zscore_layer = ZscoreLayer(zscores) # Next, we create a GeometryFeature layer for the subsequent tests # We only want it to calculate the distances, so we specify that # the feature tuples are the ones we calculated in GeometryStatistics geometry_feature = GeometryFeature( feature_tuples=geom_stats.master_description_tuples) def test_combiner_geometry_feature(): # Tests FeatureCombiner for just single GeometryFeature # In this case, geometry output should be None. # First, we instantiate a FeatureCombiner layer_list = [geometry_feature] feature_combiner = FeatureCombiner(layer_list, save_geometry=False) # If there is simply a GeometryFeature, then feature_combiner.forward() # should return feature_ouput, geometry_output, with geometry_features # equal to None feature_output, geometry_output = feature_combiner(coords_torch) assert feature_combiner.interfeature_transforms == [None]
def test_lipschitz_full_model_all_mask(): # Test lipschitz mask functionality for completely False schnet mask # and completely False terminal network mask for a model that contains # both SchnetFeatures and a terminal network # using strong Lipschitz projection ( lambda_ << 1 ) # In this case, we expect all weight layers to remain unchanged # Here we ceate a CGSchNet model with a GeometryFeature layer, # 10 interaction blocks, a random feature size, embedding, and # cutoff from the setup at the top of this file, and a terminal # network of 10 layers and with a random width width = np.random.randint(10, high=20) test_arch = LinearLayer(feature_size, width, activation=nn.Tanh()) for _ in range(9): test_arch += LinearLayer(width, width, activation=nn.Tanh()) test_arch += LinearLayer(width, 1, activation=None) schnet_feature = SchnetFeature(feature_size=feature_size, embedding_layer=embedding_layer, rbf_layer=rbf_layer, n_interaction_blocks=10, n_beads=beads, neighbor_cutoff=neighbor_cutoff, calculate_geometry=False) feature_list = FeatureCombiner([ GeometryFeature(feature_tuples='all_backbone', n_beads=beads), schnet_feature ], distance_indices=dist_idx) full_test_model = CGnet(test_arch, ForceLoss(), feature=feature_list) # The pre_projection weights are the terminal network weights followed by # the SchnetFeature weights lambda_ = float(1e-12) pre_projection_terminal_network_weights = [ layer.weight.data for layer in full_test_model.arch if isinstance(layer, nn.Linear) ] pre_projection_schnet_weights = _schnet_feature_linear_extractor( full_test_model.feature.layer_list[-1], return_weight_data_only=True) full_pre_projection_weights = (pre_projection_terminal_network_weights + pre_projection_schnet_weights) # Here we make the lipschitz projection, specifying the 'all' option for # both the terminal network mask and the schnet mask lipschitz_projection(full_test_model, lambda_, network_mask='all', schnet_mask='all') post_projection_terminal_network_weights = [ layer.weight.data for layer in full_test_model.arch if isinstance(layer, nn.Linear) ] post_projection_schnet_weights = _schnet_feature_linear_extractor( full_test_model.feature.layer_list[-1], return_weight_data_only=True) full_post_projection_weights = (post_projection_terminal_network_weights + post_projection_schnet_weights) # Here we verify that all weight layers remain unaffected by the strong # Lipschitz projection for pre, post in zip(full_pre_projection_weights, full_post_projection_weights): np.testing.assert_array_equal(pre.numpy(), post.numpy())
def test_cgnet_simulation(): # Tests a simulation from a CGnet built with the GeometryFeature # for the shapes of its coordinate, force, and potential outputs # First, we set up a bond harmonic prior and a GeometryFeature layer bonds_idx = geom_stats.return_indices('Bonds') bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds', as_list=True) harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions) feature_layer = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) num_feats = feature_layer(coords).size()[1] # Next, we create a 4 layer hidden architecture with a random width # and with a scalar output rand = np.random.randint(1, 10) arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) + LinearLayer(rand, 1, bias=True, activation=None)) # Next, we instance a CGnet model using the above objects # with force matching as a loss criterion model = CGnet(arch, ForceLoss(), feature=feature_layer, priors=[harmonic_potential]) model.eval() # Here, we produce mock target protein force data forces = torch.randn((frames, beads, 3), requires_grad=False) # Here, we create an optimizer for traning the model, # and we train it for one epoch optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0) optimizer.zero_grad() energy, pred_forces = model.forward(coords) loss = model.criterion(pred_forces, forces) loss.backward() optimizer.step() # Here, we define random simulation frame lengths # as well as randomly choosing to save every 2 or 4 frames length = np.random.choice([2, 4]) * 2 save = np.random.choice([2, 4]) # Here we instance a simulation class and produce a CG trajectory my_sim = Simulation(model, coords, beta=geom_stats.beta, length=length, save_interval=save, save_forces=True, save_potential=True) traj = my_sim.simulate() # We test to see if the trajectory is the proper shape based on the above # choices for simulation length and frame saving assert traj.shape == (frames, length // save, beads, dims) assert my_sim.simulated_forces.shape == (frames, length // save, beads, dims) assert my_sim.simulated_potential.shape == (frames, length // save, 1)
# Number of frames frames = np.random.randint(1, 10) # Number of coarse-grained beads. We need at least 8 so we can do # dihedrals in the backbone tests (where every other atom is designated # as a backbone atom) beads = np.random.randint(8, 20) # Number of dimensions; for now geometry only handles 3 dims = 3 # Create a pseudo simulation dataset data = np.random.randn(frames, beads, dims) data_tensor = torch.Tensor(data) geom_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=beads) _ = geom_feature.forward(data_tensor) stats = GeometryStatistics(data_tensor, backbone_inds='all', get_all_distances=True, get_backbone_angles=True, get_backbone_dihedrals=True, get_redundant_distance_mapping=True) def test_feature_tuples(): # Tests to see if the feature_tuples attribute is assembled correctly unique_tuples = [] for desc in stats.order: # for each type of feature
def test_combiner_output_with_geometry_propagation(): # This tests CGnet concatenation with propogating geometries # to make sure the FeatureCombiner method matches a manual calculation # This calculates all pairwise distances and backbone angles and dihedrals full_geometry_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=n_beads) # Here we generate a random schent feature that does not calculate geometry schnet_feature, embedding_property, feature_size = _get_random_schnet_feature( calculate_geometry=False) # grab distance indices dist_idx = geom_stats.return_indices('Distances') # Here we assemble the post-schnet fully connected network for manual # calculation of the energy/forces # The length of the geometry feature is the length of its tuples, where # each four-body dihedral is double counted to account for cosines and sines geom_feature_length = (len(full_geometry_feature.feature_tuples) + len([f for f in full_geometry_feature.feature_tuples if len(f) == 4])) total_size = feature_size*n_beads + geom_feature_length width = np.random.randint(5, high=10) # random fully-connected width arch = LinearLayer(total_size, width, activation=nn.Tanh()) for i in range(2): arch += LinearLayer(width, width, activation=nn.Tanh()) arch += LinearLayer(width, 1, activation=None) # Manual calculation using geometry feature concatenation and propagation # Here, we grab the distances to forward through the schnet feature. They # must be reindexed to the redundant mapping ammenable to schnet tools geometry_output = full_geometry_feature(coords_torch) distances = geometry_output[:, geom_stats.redundant_distance_mapping] schnet_output = schnet_feature(distances, embedding_property) # Here, we perform Manual feature concatenation between schnet and geometry # outputs. First, we flatten the schnet output for compatibility n_frames = coords_torch.shape[0] schnet_output = schnet_output.reshape(n_frames, -1) concatenated_features = torch.cat((schnet_output, geometry_output), dim=1) # Here, we feed the concatednated features through the terminal network and # predict the energy/forces terminal_network = nn.Sequential(*arch) manual_energy = terminal_network(concatenated_features) # Add in the bond potential contribution manual_energy += bond_potential( geometry_output[:, bond_potential.callback_indices]) manual_forces = torch.autograd.grad(-torch.sum(manual_energy), coords_torch)[0] # Next, we produce the same output using a CGnet and test numerical # similarity, thereby testing the internal concatenation function of # CGnet.forward(). We create our model using a FeatureCombiner layer_list = [full_geometry_feature, schnet_feature] feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx, propagate_geometry=True) model = CGnet(arch, ForceLoss(), feature=feature_combiner, priors=[bond_potential]) # Next, we forward the random protein data through the model energy, forces = model.forward(coords_torch, embedding_property=embedding_property) # Test if manual and CGnet calculations match numerically np.testing.assert_array_equal(energy.detach().numpy(), manual_energy.detach().numpy()) np.testing.assert_array_equal(forces.detach().numpy(), manual_forces.detach().numpy())