Пример #1
0
def test_equality_with_cgnet_distances():
    # Make sure CA distances are consistent with GeometryFeature

    geom_feature = GeometryFeature(feature_tuples='all_backbone',
                                   n_beads=beads)
    out = geom_feature.forward(data_tensor)

    molecule = CGMolecule(names=names, resseq=resseq, resmap=resmap)
    traj = molecule.make_trajectory(data)

    # Calculate all pairs of CA distances
    CA_inds = [i for i, name in enumerate(names) if name == 'CA']
    CA_pairs = []  # these are feature tuples
    for i, ind1 in enumerate(CA_inds[:-1]):
        for j, ind2 in enumerate(CA_inds[i + 1:]):
            CA_pairs.append((ind1, ind2))
    mdtraj_CA_dists = md.compute_distances(traj, CA_pairs)

    # map each CA distance feature tuple to  the integer index
    CA_feature_tuple_dict = {
        key: i
        for i, key in enumerate(geom_feature.descriptions['Distances'])
        if key in CA_pairs
    }

    # retrieve CA distances only from the feature object
    cgnet_CA_dists = geom_feature.distances.numpy(
    )[:, [CA_feature_tuple_dict[key] for key in CA_pairs]]

    np.testing.assert_allclose(mdtraj_CA_dists, cgnet_CA_dists, rtol=1e-6)
Пример #2
0
def test_distance_features():
    # Make sure pairwise distance features are consistent with scipy

    geom_feature = GeometryFeature(feature_tuples='all_backbone',
                                   n_beads=beads)
    # Forward pass calculates features (distances, angles, dihedrals)
    # and makes them accessible as attributes
    _ = geom_feature.forward(data_tensor)

    # Test each frame x_i
    for frame_ind in range(frames):
        Dmat_xi = scipy.spatial.distance.squareform(
            scipy.spatial.distance.pdist(data[frame_ind]))

        xi_feature_distances = list(geom_feature.distances[frame_ind].numpy())
        feature_descriptions = geom_feature.descriptions['Distances']

        # Arrange the scipy distances in the right order for comparing
        # to the GeometryFeature distances
        xi_scipy_distances = [
            Dmat_xi[feature_descriptions[i]]
            for i in range(len(feature_descriptions))
        ]

        np.testing.assert_allclose(xi_feature_distances,
                                   xi_scipy_distances,
                                   rtol=1e-6)
Пример #3
0
def test_dihedral_features():
    # Make sure backbone dihedral features are consistent with manual calculation

    # For spatial coordinates a, b, c, d, the dihedral \alpha describing
    # a-b-c-d (i.e., the plane between angles a-b-c- and b-c-d-) is calculated
    # using the following formula:
    #
    # \overline{ba} = b - a
    # \overline{cb} = c - a
    # \overline{dc} = d - c
    #
    # % normal vector with plane of first and second angles, respectively
    # n_1 = \overline{ba} \times \overline{cb}
    # n_2 = \overline{cb} \ times \overline{dc}
    #
    # m_1 = n_2 \times n_1
    #
    # \sin(\alpha) = \frac{m_1 \dot \overline{cb}}
    #                     {\sqrt{\overline{cb} \dot \overline{cb}}}
    # \cos(\alpha) = n_2 \dot n_1
    # \alpha = \arctan{\frac{\sin(\alpha)}{\cos(\alpha)}}

    geom_feature = GeometryFeature(feature_tuples='all_backbone',
                                   n_beads=beads)
    # Forward pass calculates features (distances, angles, dihedrals)
    # and makes them accessible as attributes
    _ = geom_feature.forward(data_tensor)

    # Manually calculate the dihedrals one frame at a time
    diheds = []
    for frame_data in data:
        dihed_list = []
        for i in range(data.shape[1] - 3):
            a = frame_data[i]
            b = frame_data[i + 1]
            c = frame_data[i + 2]
            d = frame_data[i + 3]

            ba = b - a
            cb = c - b
            dc = d - c

            n1 = np.cross(ba, cb)
            n2 = np.cross(cb, dc)
            m1 = np.cross(n2, n1)
            term1 = np.dot(m1, cb) / np.sqrt(np.dot(cb, cb))
            term2 = np.dot(n2, n1)
            dihed_list.append(np.arctan2(term1, term2))
        diheds.append(dihed_list)

    # Instead of comparing the sines and cosines, compare the arctans
    feature_diheds = [
        np.arctan2(geom_feature.dihedral_sines[i].numpy(),
                   geom_feature.dihedral_cosines[i].numpy())
        for i in range(len(geom_feature.dihedral_sines))
    ]
    np.testing.assert_allclose(np.abs(feature_diheds),
                               np.abs(diheds),
                               rtol=1e-4)
Пример #4
0
def test_zscore_layer():
    # Tests ZscoreLayer() for correct normalization

    # Notes
    # -----
    # rescaled_feat_truth is in principle equal to:
    #
    # from sklearn.preprocessing import StandardScaler
    # scalar = StandardScaler()
    # rescaled_feat_truth = scalar.fit_transform(feat)
    #
    # However, the equality is only preserved with precision >= 1e-4.

    # Complete prior dictionary
    full_prior_stats = geom_stats.get_prior_statistics()

    # First compute the reference zscore-rescaled features
    zscores = torch.zeros((2, len(full_prior_stats)))
    for i, key in enumerate(full_prior_stats.keys()):
        zscores[0, i] = full_prior_stats[key]['mean']
        zscores[1, i] = full_prior_stats[key]['std']

    # Then we create a feature layer and featurize our linear protein test data
    geom_feat = GeometryFeature(feature_tuples='all_backbone', n_beads=beads)
    feat = geom_feat(coords)
    rescaled_feat_truth = (feat - zscores[0, :]) / zscores[1, :]

    # Next, we instance a ZscoreLayer and test to see if its forward
    # method matches the reference calculation above
    zlayer = ZscoreLayer(zscores)
    rescaled_feat = zlayer(feat)

    np.testing.assert_array_equal(rescaled_feat.detach().numpy(),
                                  rescaled_feat_truth.detach().numpy())
Пример #5
0
def test_equality_with_cgnet_angles():
    # Make sure CA distances caluclated internally are consistent with mdtraj.
    # This test appears here because it requires an mdtraj dependency.
    molecule = CGMolecule(names=names, resseq=resseq, resmap=resmap)
    traj = molecule.make_trajectory(data)

    # Grab the CA inds only to get the backbone angles and compute them
    # with mdtraj
    CA_inds = [i for i, name in enumerate(names) if name == 'CA']
    backbone_angles = [(CA_inds[i], CA_inds[i + 1], CA_inds[i + 2])
                       for i in range(len(CA_inds) - 2)]
    mdtraj_angles = md.compute_angles(traj, backbone_angles)

    # Get the GeometryFeature for just the
    geom_feature = GeometryFeature(feature_tuples=backbone_angles)
    out = geom_feature.forward(data_tensor)

    cgnet_angles = geom_feature.angles

    np.testing.assert_allclose(mdtraj_angles, cgnet_angles, rtol=1e-4)
Пример #6
0
def test_distance_index_shuffling():
    # Make sure shuffled distances return the right results

    # Create a dataset with one frame, 10 beads, 3 dimensions
    data_to_shuffle = np.random.randn(1, 10, 3)
    data_to_shuffle_tensor = torch.Tensor(data_to_shuffle)

    y_dist_inds, _ = g.get_distance_indices(10)

    geom_feature = GeometryFeature(feature_tuples=y_dist_inds)
    # Forward pass calculates features (distances, angles, dihedrals)
    # and makes them accessible as attributes
    _ = geom_feature.forward(data_to_shuffle_tensor)

    # Shuffle the distances indices
    inds = np.arange(len(y_dist_inds))
    np.random.shuffle(inds)

    shuffled_inds = [tuple(i) for i in np.array(y_dist_inds)[inds]]
    geom_feature_shuffle = GeometryFeature(feature_tuples=shuffled_inds)
    _ = geom_feature_shuffle.forward(data_to_shuffle_tensor)

    # See if the non-shuffled distances are the same when indexexed according
    # to the shuffling
    np.testing.assert_array_equal(geom_feature_shuffle.distances[0],
                                  geom_feature.distances[0][inds])
Пример #7
0
def test_equality_with_cgnet_dihedrals():
    # Make sure dihedrals are consistent with GeometryFeature

    geom_feature = GeometryFeature(feature_tuples='all_backbone',
                                   n_beads=beads)
    out = geom_feature.forward(data_tensor)

    molecule = CGMolecule(names=names, resseq=resseq, resmap=resmap)
    traj = molecule.make_trajectory(data)

    mdtraj_phis = md.compute_phi(traj)[1]
    mdtraj_psis = md.compute_psi(traj)[1]

    mdtraj_phi_cosines = np.cos(mdtraj_phis)
    mdtraj_phi_sines = np.sin(mdtraj_phis)

    mdtraj_psi_cosines = np.cos(mdtraj_psis)
    mdtraj_psi_sines = np.sin(mdtraj_psis)

    # To get phi's and psi's out of cgnet, we need to specify which
    # indices they correspond to along the backbone
    # ['N', 'CA', 'C', 'N'] dihedrals
    phi_inds = [i * 3 for i in range(residues)]
    # ['CA', 'C', 'N', 'CA'] dihedrals
    psi_inds = [i * 3 + 1 for i in range(residues)]

    cgnet_phi_cosines = geom_feature.dihedral_cosines.numpy()[:, phi_inds]
    cgnet_phi_sines = geom_feature.dihedral_sines.numpy()[:, phi_inds]

    cgnet_psi_cosines = geom_feature.dihedral_cosines.numpy()[:, psi_inds]
    cgnet_psi_sines = geom_feature.dihedral_sines.numpy()[:, psi_inds]

    np.testing.assert_allclose(mdtraj_phi_cosines,
                               cgnet_phi_cosines,
                               rtol=1e-4)
    np.testing.assert_allclose(mdtraj_phi_sines, cgnet_phi_sines, rtol=1e-4)
    np.testing.assert_allclose(mdtraj_psi_cosines,
                               cgnet_psi_cosines,
                               rtol=1e-4)
    np.testing.assert_allclose(mdtraj_psi_sines, cgnet_psi_sines, rtol=1e-4)
Пример #8
0
def test_dihedral_index_shuffling():
    # Make sure shuffled dihedrals return the right results

    # Create a dataset with one frame, 100 beads, 3 dimensions
    data_to_shuffle = np.random.randn(1, 100, 3)
    data_to_shuffle_tensor = torch.Tensor(data_to_shuffle)

    y_dihed_inds = [(i, i + 1, i + 2, i + 3) for i in range(100 - 3)]

    geom_feature = GeometryFeature(feature_tuples=y_dihed_inds)
    # Forward pass calculates features (distances, angles, dihedrals)
    # and makes them accessible as attributes
    _ = geom_feature.forward(data_to_shuffle_tensor)

    # Shuffle all the inds that can serve as a dihedral start
    inds = np.arange(100 - 3)
    np.random.shuffle(inds)

    shuffled_inds = [tuple(i) for i in np.array(y_dihed_inds)[inds]]
    geom_feature_shuffle = GeometryFeature(feature_tuples=shuffled_inds)
    _ = geom_feature_shuffle.forward(data_to_shuffle_tensor)

    # See if the non-shuffled dihedral sines and cosines are the same when
    # indexexed according to the shuffling
    np.testing.assert_allclose(geom_feature_shuffle.dihedral_cosines[0],
                               geom_feature.dihedral_cosines[0][inds],
                               rtol=1e-5)

    np.testing.assert_allclose(geom_feature_shuffle.dihedral_sines[0],
                               geom_feature.dihedral_sines[0][inds],
                               rtol=1e-5)
Пример #9
0
def test_backbone_angle_features():
    # Make sure backbone angle features are consistent with manual calculation

    # For spatial coordinates a, b, c, the angle \theta describing a-b-c
    # is calculated using the following formula:
    #
    # \overline{ba} = a - b
    # \overline{cb} = c - b
    # \cos(\theta) = (\frac{\overline{ba} \dot \overline{cb}}
    #                      {||\overline{ba}|| ||\overline{cb}||}
    # \theta = \arccos(\theta)

    geom_feature = GeometryFeature(feature_tuples='all_backbone',
                                   n_beads=beads)
    # Forward pass calculates features (distances, angles, dihedrals)
    # and makes them accessible as attributes
    _ = geom_feature.forward(data_tensor)

    # Manually calculate the angles one frame at a time
    angles = []
    for frame_data in data:
        angle_list = []
        for i in range(data.shape[1] - 2):
            a = frame_data[i]
            b = frame_data[i + 1]
            c = frame_data[i + 2]

            ba = a - b
            cb = c - b

            cos_angle = np.dot(ba,
                               cb) / (np.linalg.norm(ba) * np.linalg.norm(cb))
            angle = np.arccos(cos_angle)
            angle_list.append(angle)
        angles.append(angle_list)

    np.testing.assert_allclose(geom_feature.angles, angles, rtol=1e-4)
Пример #10
0
def test_cgnet():
    # Tests CGnet class criterion attribute, architecture size, and network
    # output size. Also tests priors for proper residual connection to
    # feature layer.

    # First, we set up a bond harmonic prior and a GeometryFeature layer
    bonds_idx = geom_stats.return_indices('Bonds')
    bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds',
                                                            as_list=True)
    harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions)
    feature_layer = GeometryFeature(feature_tuples='all_backbone',
                                    n_beads=beads)
    num_feats = feature_layer(coords).size()[1]

    # Next, we create a 4 layer hidden architecture with a random width
    # and with a scalar output
    rand = np.random.randint(1, 10)
    arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, 1, bias=True, activation=None))

    # Next, we instance a CGnet model using the above objects
    # with force matching as a loss criterion
    model = CGnet(arch,
                  ForceLoss(),
                  feature=feature_layer,
                  priors=[harmonic_potential])

    # Test to see if the prior is embedded
    assert model.priors is not None

    # Test to see if the hidden architexture has the correct length
    assert len(arch) == len(model.arch)

    # Test to see if criterion is embedded correctly
    assert isinstance(model.criterion, ForceLoss)

    # Next, we forward the test protein data from the preamble through
    # the model
    energy, force = model.forward(coords)
    # Here, we test to see if the predicted energy is scalar
    # and the predicted forces are the same dimension as the input coordinates
    np.testing.assert_array_equal(energy.size(), (coords.size()[0], 1))
    np.testing.assert_array_equal(force.size(), coords.size())
Пример #11
0
def test_combiner_shape_with_geometry_propagation():
    # This tests a network with schnet features in which the geometry features
    # are also propagated through the neural network

    # This calculates all pairwise distances and backbone angles and dihedrals
    full_geometry_feature = GeometryFeature(feature_tuples='all_backbone',
                                            n_beads=n_beads)

    schnet_feature, embedding_property, feature_size = _get_random_schnet_feature(
                                                          calculate_geometry=False)
    layer_list = [full_geometry_feature, schnet_feature]
    # grab distance indices
    dist_idx = geom_stats.return_indices('Distances')

    # Here, we set propagate_geometry to true
    feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx,
                                       propagate_geometry=True)

    # The length of the geometry feature is the length of its tuples, where
    # each four-body dihedral is double counted to account for cosines and sines
    geom_feature_length = (len(full_geometry_feature.feature_tuples) +
                           len([f for f in full_geometry_feature.feature_tuples
                                if len(f) == 4]))

    # The total_size is what we need to input into our first linear layer, and
    # it represents the concatenation of the flatted schnet features with the
    # geometry features
    total_size = feature_size*n_beads + geom_feature_length

    # Now we just repeat the procedure from test_combiner_full above
    width = np.random.randint(5, high=10)  # random fully-connected width
    arch = LinearLayer(total_size,
                       width, activation=nn.Tanh())
    for i in range(2):
        arch += LinearLayer(width, width, activation=nn.Tanh())
    arch += LinearLayer(width, 1, activation=None)
    model = CGnet(arch, ForceLoss(), feature=feature_combiner,
                  priors=[bond_potential])

    # Next, we forward the random protein data through the model
    energy, forces = model.forward(coords_torch,
                                   embedding_property=embedding_property)

    # Ensure CGnet output has the correct size
    np.testing.assert_array_equal(energy.size(), (n_frames, 1))
    np.testing.assert_array_equal(forces.size(), (n_frames, n_beads, 3))
Пример #12
0
def test_repulsion_layer():
    # Tests RepulsionLayer class for calculation and output size

    # First, we use the preamble repulsion variables to instance a
    # a RepulsionLayer. We pass the output of a feature layer to compare
    # RepulsionLayer forward method to manual energy calculation

    # The following sets up distance variables
    # List of distances at least 2 beads apart for RepulsionLayer tests
    repul_distances = [
        i for i in geom_stats.descriptions['Distances'] if abs(i[0] - i[1]) > 2
    ]
    repul_idx = geom_stats.return_indices(repul_distances)  # Indices of beads
    # Random excluded volumes
    ex_vols = np.random.uniform(2, 8, len(repul_distances))
    # Random interaction exponentials
    exps = np.random.randint(1, 6, len(repul_distances))
    # List of interaction dictionaries for forming RepulsionLayers
    repul_list = [{
        'ex_vol': ex_vol,
        "exp": exp
    } for ex_vol, exp in zip(ex_vols, exps)]

    repulsion_potential = RepulsionLayer(repul_idx, repul_list)
    geom_feat = GeometryFeature(feature_tuples='all_backbone', n_beads=beads)
    output_features = geom_feat(coords)
    energy = repulsion_potential(
        output_features[:, repulsion_potential.callback_indices])

    # Test to see if RepulsionLayer ouput is scalar energy
    np.testing.assert_array_equal(energy.size(), (frames, 1))
    # Test to see if RepulsionLayer callback_indices are correct
    assert repul_idx == repulsion_potential.callback_indices
    # Next, we test to see if the manually calculated energy
    # matches the output of the RepulsionLayer
    p1 = torch.tensor(ex_vols).float()
    p2 = torch.tensor(exps).float()
    energy_check = torch.sum(
        (p1 / output_features[:, repul_idx])**p2, 1).reshape(
            len(output_features), 1) / 2
    np.testing.assert_array_equal(energy.detach().numpy(),
                                  energy_check.detach().numpy())
Пример #13
0
def test_harmonic_layer():
    # Tests HarmonicLayer class for calculation and output size

    # Set up bond indices (integers) and interactiosn
    bonds_idx = geom_stats.return_indices('Bonds')  # Bond indices
    # List of bond interaction dictionaries for assembling priors
    bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds',
                                                            as_list=True)

    # First, we use the preamble bond variable to instance a
    # HarmonicLayer. We pass the output of a feature layer to compare
    # HarmonicLayer forward method to manual energy calculation
    harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions)
    geom_feat = GeometryFeature(feature_tuples='all_backbone', n_beads=beads)
    output_features = geom_feat(coords)
    energy = harmonic_potential(
        output_features[:, harmonic_potential.callback_indices])

    # Test to see if HarmonicLayer output is scalar energy
    np.testing.assert_array_equal(energy.size(), (frames, 1))
    # Test to see if HarmonicLayer callback_indices are correct
    assert bonds_idx == harmonic_potential.callback_indices

    # Next, we test to see if the manually calculated energy
    # matches the output of the HarmonicLayer
    feature_stats = geom_stats.get_prior_statistics('Bonds')
    harmonic_parameters = torch.tensor([])
    for bead_tuple, stat in feature_stats.items():
        harmonic_parameters = torch.cat(
            (harmonic_parameters, torch.tensor([[stat['k']], [stat['mean']]])),
            dim=1)
    energy_check = torch.sum(
        harmonic_parameters[0, :] *
        (output_features[:, bonds_idx] - harmonic_parameters[1, :])**2,
        1).reshape(len(output_features), 1) / 2

    np.testing.assert_array_equal(energy.detach().numpy(),
                                  energy_check.detach().numpy())
Пример #14
0
def test_prior_callback_order_2():
    # The order of callback indices should not change the final HarmonicLayer
    # energy output - so here we test to see if the shuffled HarmonicLayer output
    # matches a manual calculation using the default order
    # We use the same setup as in test_prior_callback_order_1, but add further
    # add a GeometryFeature layer for comparing energy outputs
    geom_feat = GeometryFeature(feature_tuples='all_backbone', n_beads=beads)
    output_features = geom_feat(coords)
    bonds_tuples = [
        beads for beads in geom_stats.master_description_tuples
        if len(beads) == 2 and abs(beads[0] - beads[1]) == 1
    ]
    np.random.shuffle(bonds_tuples)
    bonds_idx = geom_stats.return_indices(bonds_tuples)
    bonds_interactions, _ = geom_stats.get_prior_statistics(
        features=list(bonds_tuples), as_list=True)
    harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions)

    # Here, we test the energy of the shuffled HarmonicLayer with a manual
    # calculation according to the default GeometryStatistics bond order
    energy = harmonic_potential(
        output_features[:, harmonic_potential.callback_indices])
    feature_stats = geom_stats.get_prior_statistics('Bonds')
    feature_idx = geom_stats.return_indices('Bonds')
    harmonic_parameters = torch.tensor([])
    for bead_tuple, stat in feature_stats.items():
        harmonic_parameters = torch.cat(
            (harmonic_parameters, torch.tensor([[stat['k']], [stat['mean']]])),
            dim=1)
    energy_check = torch.sum(
        harmonic_parameters[0, :] *
        (output_features[:, feature_idx] - harmonic_parameters[1, :])**2,
        1).reshape(len(output_features), 1) / 2
    np.testing.assert_allclose(np.sum(energy.detach().numpy()),
                               np.sum(energy_check.detach().numpy()),
                               rtol=1e-4)
Пример #15
0
def test_feature_combiner_shapes():
    # Test feature combiner shapes with geometry features and schnet

    full_geometry_feature = GeometryFeature(feature_tuples='all_backbone',
                                            n_beads=n_beads)

    schnet_feature, embedding_property, feature_size = _get_random_schnet_feature(
                                                          calculate_geometry=False)
    layer_list = [full_geometry_feature, schnet_feature]
    # grab distance indices
    dist_idx = geom_stats.return_indices('Distances')

    # Here, we set propagate_geometry to true
    feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx,
                                       propagate_geometry=True)

    # The length of the geometry feature is the length of its tuples, where
    # each four-body dihedral is double counted to account for cosines and sines
    geom_feature_length = (len(full_geometry_feature.feature_tuples) +
                           len([f for f in full_geometry_feature.feature_tuples
                                if len(f) == 4]))

    # The total_size is what we need to input into our first linear layer, and
    # it represents the concatenation of the flatted schnet features with the
    # geometry features
    total_size = feature_size*n_beads + geom_feature_length

    # The forward method returns the object to be propagated to the NN and
    # the geometry features.
    feature_output, geometry_features = feature_combiner.forward(coords_torch,
                                                            embedding_property)

    np.testing.assert_array_equal(feature_output.shape,
                                  [n_frames, n_beads, feature_size])
    np.testing.assert_array_equal(geometry_features.shape,
                                  [n_frames, geom_feature_length])
Пример #16
0
def test_lipschitz_full_model_random_mask():
    # Test lipschitz mask functionality for random binary schnet mask
    # and random binary terminal network mask for a model that contains
    # both SchnetFeatures and a terminal network
    # using strong Lipschitz projection ( lambda_ << 1 )
    # If the mask element is True, a strong Lipschitz projection
    # should occur - else, the weights should remain unchanged.

    # Here we ceate a CGSchNet model with a GeometryFeature layer,
    # 10 interaction blocks, a random feature size, embedding, and
    # cutoff from the setup at the top of this file, and a terminal
    # network of 10 layers and with a random width
    width = np.random.randint(10, high=20)
    test_arch = LinearLayer(feature_size, width, activation=nn.Tanh())
    for _ in range(9):
        test_arch += LinearLayer(width, width, activation=nn.Tanh())
    test_arch += LinearLayer(width, 1, activation=None)
    schnet_feature = SchnetFeature(feature_size=feature_size,
                                   embedding_layer=embedding_layer,
                                   rbf_layer=rbf_layer,
                                   n_interaction_blocks=10,
                                   n_beads=beads,
                                   neighbor_cutoff=neighbor_cutoff,
                                   calculate_geometry=False)
    feature_list = FeatureCombiner([
        GeometryFeature(feature_tuples='all_backbone', n_beads=beads),
        schnet_feature
    ],
                                   distance_indices=dist_idx)
    full_test_model = CGnet(test_arch, ForceLoss(), feature=feature_list)

    # The pre_projection weights are the terminal network weights followed by
    # the SchnetFeature weights
    lambda_ = float(1e-12)
    pre_projection_terminal_network_weights = [
        layer.weight.data for layer in full_test_model.arch
        if isinstance(layer, nn.Linear)
    ]
    pre_projection_schnet_weights = _schnet_feature_linear_extractor(
        full_test_model.feature.layer_list[-1], return_weight_data_only=True)
    full_pre_projection_weights = (pre_projection_terminal_network_weights +
                                   pre_projection_schnet_weights)

    # Next, we assemble the masks for both the terminal network and the
    # SchnetFeature weights. There are 5 instances of nn.Linear for each
    # interaction block in the SchnetFeature
    network_lip_mask = [
        np.random.randint(2, dtype=bool) for _ in range(
            len([
                layer for layer in full_test_model.arch
                if isinstance(layer, nn.Linear)
            ]))
    ]

    schnet_lip_mask = [
        np.random.randint(2, dtype=bool)
        for _ in range(5 * len(schnet_feature.interaction_blocks))
    ]
    full_lip_mask = network_lip_mask + schnet_lip_mask

    # Here we make the lipschitz projection
    lipschitz_projection(full_test_model,
                         lambda_,
                         network_mask=network_lip_mask,
                         schnet_mask=schnet_lip_mask)
    post_projection_terminal_network_weights = [
        layer.weight.data for layer in full_test_model.arch
        if isinstance(layer, nn.Linear)
    ]
    post_projection_schnet_weights = _schnet_feature_linear_extractor(
        full_test_model.feature.layer_list[-1], return_weight_data_only=True)
    full_post_projection_weights = (post_projection_terminal_network_weights +
                                    post_projection_schnet_weights)

    # Here we verify that the masked layers remain unaffected by the strong
    # Lipschitz projection
    for mask_element, pre, post in zip(full_lip_mask,
                                       full_pre_projection_weights,
                                       full_post_projection_weights):
        # If the mask element is True then the norm of the weights should be greatly
        # reduced after the lipschitz projection
        if mask_element:
            np.testing.assert_raises(AssertionError,
                                     np.testing.assert_array_equal,
                                     pre.numpy(), post.numpy())
            assert np.linalg.norm(pre.numpy()) > np.linalg.norm(post.numpy())
        # If the mask element is False then the weights should be unaffected
        if not mask_element:
            np.testing.assert_array_equal(pre.numpy(), post.numpy())
Пример #17
0
                                get_backbone_angles=False,
                                get_backbone_dihedrals=False,
                                get_redundant_distance_mapping=True)
bonds_list, _ = geom_stats.get_prior_statistics('Bonds', as_list=True)
bonds_idx = geom_stats.return_indices('Bonds')
# Here we use the bond statistics to create a HarmonicLayer
bond_potential = HarmonicLayer(bonds_idx, bonds_list)

# Next, we produce the zscore statistics and create a ZscoreLayer
zscores, _ = geom_stats.get_zscore_array()
zscore_layer = ZscoreLayer(zscores)

# Next, we create a GeometryFeature layer for the subsequent tests
# We only want it to calculate the distances, so we specify that
# the feature tuples are the ones we calculated in GeometryStatistics
geometry_feature = GeometryFeature(
    feature_tuples=geom_stats.master_description_tuples)


def test_combiner_geometry_feature():
    # Tests FeatureCombiner for just single GeometryFeature
    # In this case, geometry output should be None.
    # First, we instantiate a FeatureCombiner
    layer_list = [geometry_feature]
    feature_combiner = FeatureCombiner(layer_list,
                                       save_geometry=False)

    # If there is simply a GeometryFeature, then feature_combiner.forward()
    # should return feature_ouput, geometry_output, with geometry_features
    # equal to None
    feature_output, geometry_output = feature_combiner(coords_torch)
    assert feature_combiner.interfeature_transforms == [None]
Пример #18
0
def test_lipschitz_full_model_all_mask():
    # Test lipschitz mask functionality for completely False schnet mask
    # and completely False terminal network mask for a model that contains
    # both SchnetFeatures and a terminal network
    # using strong Lipschitz projection ( lambda_ << 1 )
    # In this case, we expect all weight layers to remain unchanged

    # Here we ceate a CGSchNet model with a GeometryFeature layer,
    # 10 interaction blocks, a random feature size, embedding, and
    # cutoff from the setup at the top of this file, and a terminal
    # network of 10 layers and with a random width
    width = np.random.randint(10, high=20)
    test_arch = LinearLayer(feature_size, width, activation=nn.Tanh())
    for _ in range(9):
        test_arch += LinearLayer(width, width, activation=nn.Tanh())
    test_arch += LinearLayer(width, 1, activation=None)
    schnet_feature = SchnetFeature(feature_size=feature_size,
                                   embedding_layer=embedding_layer,
                                   rbf_layer=rbf_layer,
                                   n_interaction_blocks=10,
                                   n_beads=beads,
                                   neighbor_cutoff=neighbor_cutoff,
                                   calculate_geometry=False)
    feature_list = FeatureCombiner([
        GeometryFeature(feature_tuples='all_backbone', n_beads=beads),
        schnet_feature
    ],
                                   distance_indices=dist_idx)
    full_test_model = CGnet(test_arch, ForceLoss(), feature=feature_list)

    # The pre_projection weights are the terminal network weights followed by
    # the SchnetFeature weights
    lambda_ = float(1e-12)
    pre_projection_terminal_network_weights = [
        layer.weight.data for layer in full_test_model.arch
        if isinstance(layer, nn.Linear)
    ]
    pre_projection_schnet_weights = _schnet_feature_linear_extractor(
        full_test_model.feature.layer_list[-1], return_weight_data_only=True)
    full_pre_projection_weights = (pre_projection_terminal_network_weights +
                                   pre_projection_schnet_weights)

    # Here we make the lipschitz projection, specifying the 'all' option for
    # both the terminal network mask and the schnet mask
    lipschitz_projection(full_test_model,
                         lambda_,
                         network_mask='all',
                         schnet_mask='all')
    post_projection_terminal_network_weights = [
        layer.weight.data for layer in full_test_model.arch
        if isinstance(layer, nn.Linear)
    ]
    post_projection_schnet_weights = _schnet_feature_linear_extractor(
        full_test_model.feature.layer_list[-1], return_weight_data_only=True)
    full_post_projection_weights = (post_projection_terminal_network_weights +
                                    post_projection_schnet_weights)

    # Here we verify that all weight layers remain unaffected by the strong
    # Lipschitz projection
    for pre, post in zip(full_pre_projection_weights,
                         full_post_projection_weights):
        np.testing.assert_array_equal(pre.numpy(), post.numpy())
Пример #19
0
def test_cgnet_simulation():
    # Tests a simulation from a CGnet built with the GeometryFeature
    # for the shapes of its coordinate, force, and potential outputs

    # First, we set up a bond harmonic prior and a GeometryFeature layer
    bonds_idx = geom_stats.return_indices('Bonds')
    bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds',
                                                            as_list=True)
    harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions)
    feature_layer = GeometryFeature(feature_tuples='all_backbone',
                                    n_beads=beads)
    num_feats = feature_layer(coords).size()[1]

    # Next, we create a 4 layer hidden architecture with a random width
    # and with a scalar output
    rand = np.random.randint(1, 10)
    arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, 1, bias=True, activation=None))

    # Next, we instance a CGnet model using the above objects
    # with force matching as a loss criterion
    model = CGnet(arch,
                  ForceLoss(),
                  feature=feature_layer,
                  priors=[harmonic_potential])
    model.eval()

    # Here, we produce mock target protein force data
    forces = torch.randn((frames, beads, 3), requires_grad=False)

    # Here, we create an optimizer for traning the model,
    # and we train it for one epoch
    optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0)
    optimizer.zero_grad()
    energy, pred_forces = model.forward(coords)
    loss = model.criterion(pred_forces, forces)
    loss.backward()
    optimizer.step()

    # Here, we define random simulation frame lengths
    # as well as randomly choosing to save every 2 or 4 frames
    length = np.random.choice([2, 4]) * 2
    save = np.random.choice([2, 4])

    # Here we instance a simulation class and produce a CG trajectory
    my_sim = Simulation(model,
                        coords,
                        beta=geom_stats.beta,
                        length=length,
                        save_interval=save,
                        save_forces=True,
                        save_potential=True)

    traj = my_sim.simulate()

    # We test to see if the trajectory is the proper shape based on the above
    # choices for simulation length and frame saving
    assert traj.shape == (frames, length // save, beads, dims)
    assert my_sim.simulated_forces.shape == (frames, length // save, beads,
                                             dims)
    assert my_sim.simulated_potential.shape == (frames, length // save, 1)
Пример #20
0
# Number of frames
frames = np.random.randint(1, 10)

# Number of coarse-grained beads. We need at least 8 so we can do
# dihedrals in the backbone tests (where every other atom is designated
# as a backbone atom)
beads = np.random.randint(8, 20)

# Number of dimensions; for now geometry only handles 3
dims = 3

# Create a pseudo simulation dataset
data = np.random.randn(frames, beads, dims)
data_tensor = torch.Tensor(data)

geom_feature = GeometryFeature(feature_tuples='all_backbone', n_beads=beads)
_ = geom_feature.forward(data_tensor)

stats = GeometryStatistics(data_tensor,
                           backbone_inds='all',
                           get_all_distances=True,
                           get_backbone_angles=True,
                           get_backbone_dihedrals=True,
                           get_redundant_distance_mapping=True)


def test_feature_tuples():
    # Tests to see if the feature_tuples attribute is assembled correctly
    unique_tuples = []

    for desc in stats.order:  # for each type of feature
Пример #21
0
def test_combiner_output_with_geometry_propagation():
    # This tests CGnet concatenation with propogating geometries
    # to make sure the FeatureCombiner method matches a manual calculation

    # This calculates all pairwise distances and backbone angles and dihedrals
    full_geometry_feature = GeometryFeature(feature_tuples='all_backbone',
                                            n_beads=n_beads)
    # Here we generate a random schent feature that does not calculate geometry
    schnet_feature, embedding_property, feature_size = _get_random_schnet_feature(
                                                          calculate_geometry=False)
    # grab distance indices
    dist_idx = geom_stats.return_indices('Distances')

    # Here we assemble the post-schnet fully connected network for manual
    # calculation of the energy/forces
    # The length of the geometry feature is the length of its tuples, where
    # each four-body dihedral is double counted to account for cosines and sines
    geom_feature_length = (len(full_geometry_feature.feature_tuples) +
                           len([f for f in full_geometry_feature.feature_tuples
                                if len(f) == 4]))
    total_size = feature_size*n_beads + geom_feature_length
    width = np.random.randint(5, high=10)  # random fully-connected width
    arch = LinearLayer(total_size,
                       width, activation=nn.Tanh())
    for i in range(2):
        arch += LinearLayer(width, width, activation=nn.Tanh())
    arch += LinearLayer(width, 1, activation=None)

    # Manual calculation using geometry feature concatenation and propagation
    # Here, we grab the distances to forward through the schnet feature. They
    # must be reindexed to the redundant mapping ammenable to schnet tools
    geometry_output = full_geometry_feature(coords_torch)
    distances = geometry_output[:, geom_stats.redundant_distance_mapping]
    schnet_output = schnet_feature(distances, embedding_property)

    # Here, we perform Manual feature concatenation between schnet and geometry
    # outputs. First, we flatten the schnet output for compatibility
    n_frames = coords_torch.shape[0]
    schnet_output = schnet_output.reshape(n_frames, -1)
    concatenated_features = torch.cat((schnet_output, geometry_output), dim=1)

    # Here, we feed the concatednated features through the terminal network and
    # predict the energy/forces
    terminal_network = nn.Sequential(*arch)
    manual_energy = terminal_network(concatenated_features)
    # Add in the bond potential contribution
    manual_energy += bond_potential(
        geometry_output[:, bond_potential.callback_indices])
    manual_forces = torch.autograd.grad(-torch.sum(manual_energy),
                                        coords_torch)[0]

    # Next, we produce the same output using a CGnet and test numerical
    # similarity, thereby testing the internal concatenation function of
    # CGnet.forward(). We create our model using a FeatureCombiner
    layer_list = [full_geometry_feature, schnet_feature]
    feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx,
                                       propagate_geometry=True)

    model = CGnet(arch, ForceLoss(), feature=feature_combiner,
                  priors=[bond_potential])

    # Next, we forward the random protein data through the model
    energy, forces = model.forward(coords_torch,
                                   embedding_property=embedding_property)

    # Test if manual and CGnet calculations match numerically
    np.testing.assert_array_equal(energy.detach().numpy(),
                                  manual_energy.detach().numpy())
    np.testing.assert_array_equal(forces.detach().numpy(),
                                  manual_forces.detach().numpy())