Exemplo n.º 1
0
def test_rmsd_matrix(trajectory_benchmark):
    group = trajectory_benchmark.select_atoms('name CA')
    indexes = (0, 2, -1)  # first, third, and last frames
    xyz = idpd.extract_coordinates(trajectory_benchmark, group, indexes)
    rmsd = idpd.rmsd_matrix(xyz, condensed=True)
    reference = np.array([8.73, 8.92, 8.57])
    assert_allclose(rmsd, reference, atol=0.01)
Exemplo n.º 2
0
def trajectory_centroids(a_universe,
                         selection='not name H*',
                         segment_length=1000,
                         n_representatives=1000):
    r"""Cluster a set of consecutive trajectory segments into a set
    of representative structures via structural similarity (RMSD)

    The simulated trajectory is divided into consecutive segments, and
    hierarchical clustering is performed on each segment to yield a
    limited number of representative structures (centroids) per segment.

    Parameters
    ----------
    a_universe : :class:`~MDAnalysis.core.universe.Universe`
        Topology and trajectory.
    selection : str
        atoms for which to calculate RMSD. See the
        `selections page <https://www.mdanalysis.org/docs/documentation_pages/selections.html>`_
        for atom selection syntax.
    segment_length: int
        divide trajectory into segments of this length
    n_representatives : int
        Desired total number of representative structures. The final number
        may be close but not equal to the desired number.

    Returns
    -------
    rep_ifr : list
        Frame indexes of representative structures (centroids)
    """  # noqa: E501
    group = a_universe.select_atoms(selection)
    # Fragmentation of the trajectory
    n_frame = len(a_universe.trajectory)
    n_segments = int(n_frame / segment_length)
    nc = max(1, int(n_representatives / n_segments))  # clusters per segment
    rep_ifr = list()  # frame indexes of representative structures
    info = """Clustering the trajectory:
Creating {} representatives by partitioning {} frames into {} segments
and retrieving {} representatives from each segment.
    """.format(nc * n_segments, n_frame, n_segments, nc)
    sys.stdout.write(info)
    sys.stdout.flush()

    # Hierarchical clustering on each trajectory fragment
    for i_segment in tqdm(range(n_segments)):
        indexes = range(i_segment * segment_length,
                        (i_segment + 1) * segment_length)
        xyz = extract_coordinates(a_universe, group, indexes)
        rmsd = rmsd_matrix(xyz, condensed=True)
        z = hierarchy.linkage(rmsd, method='complete')
        for node in Tree(z=z).nodes_at_depth(nc - 1):
            # Find the frame of each representative structure
            i_frame = i_segment * segment_length + node.representative(rmsd).id
            rep_ifr.append(i_frame)
    rep_ifr.sort()
    return rep_ifr
Exemplo n.º 3
0
def cluster_trajectory(a_universe,
                       selection='not name H*',
                       segment_length=1000,
                       n_representatives=1000):
    r"""Cluster a set of representative structures by structural similarity
    (RMSD)

    The simulated trajectory is divided into segments, and hierarchical
    clustering is performed on each segment to yield a limited number of
    representative structures. These are then clustered into the final
    hierachical tree.

    Parameters
    ----------
    a_universe : :class:`~MDAnalysis.core.universe.Universe`
        Topology and trajectory.
    selection : str
        atoms for which to calculate RMSD. See the
        `selections page <https://www.mdanalysis.org/docs/documentation_pages/selections.html>`_
        for atom selection syntax.
    segment_length: int
        divide trajectory into segments of this length
    n_representatives : int
        Desired total number of representative structures. The final number
        may be close but not equal to the desired number.
    distance_matrix: :class:`~numpy:numpy.ndarray`

    Returns
    -------
    :class:`~idpflex.cluster.ClusterTrove`
        clustering results for the representatives
    """  # noqa: E501
    rep_ifr = trajectory_centroids(a_universe,
                                   selection=selection,
                                   segment_length=segment_length,
                                   n_representatives=n_representatives)

    group = a_universe.select_atoms(selection)
    xyz = extract_coordinates(a_universe, group, rep_ifr)
    distance_matrix = rmsd_matrix(xyz, condensed=True)

    # Cluster the representative structures
    tree = Tree(z=hierarchy.linkage(distance_matrix, method='complete'))
    for i_leaf, leaf in enumerate(tree.leafs):
        prop = ScalarProperty(name='iframe', y=rep_ifr[i_leaf])
        leaf[prop.name] = prop

    return ClusterTrove(rep_ifr, distance_matrix, tree)
Exemplo n.º 4
0
def test_extract_coordinates(trajectory_benchmark):
    group = trajectory_benchmark.select_atoms('resnum 2 and name CA')
    indexes = (0, -1)  # first and last frame in trajectory
    xyz = idpd.extract_coordinates(trajectory_benchmark, group, indexes)
    reference = np.array([[[53.8, 54.6, 38.8]], [[48.3, 46.6, 43.1]]])
    assert_allclose(xyz, reference, atol=0.1)
Exemplo n.º 5
0
def cluster_trajectory(a_universe,
                       selection='not name H*',
                       segment_length=1000,
                       n_representatives=1000):
    r"""Cluster a set of representative structures

    The simulated trajectory is divided into segments, and hierarchical
    clustering is performed on each segment to yield a limited number of
    representative structures. These are then clustered into the final
    hiearchical tree.

    Frame indexes from each segment are collected as cluster representatives.

    Parameters
    ----------
    a_universe : :class:`~MDAnalysis.core.universe.Universe`
        Topology and trajectory.
    selection : str
        atoms for which to calculate RMSD
    segment_length: int
        divide trajectory into chunks of this length
    n_representatives : int
        Target total number of representative structures. The final number
        may be close but not equal to the target number.

    Returns
    -------
    :class:`~idpflex.cluster.ClusterTrove`
        clustering results for the representatives
    """
    group = a_universe.select_atoms(selection)

    # Fragmentation of the trajectory
    n_frame = len(a_universe.trajectory)
    n_segments = int(n_frame / segment_length)
    nc = max(1, int(n_representatives / n_segments))  # clusters per segment
    rep_ifr = list()  # frame indexes of representative structures

    info = """Clustering the trajectory:
Creating {} representatives by partitioning {} frames into {} segments
and retrieving {} representatives from each segment.
    """.format(nc * n_segments, n_frame, n_segments, nc)
    sys.stdout.write(info)
    sys.stdout.flush()

    # Hierarchical clustering on each trajectory fragment
    for i_segment in tqdm(range(n_segments)):
        indexes = range(i_segment * segment_length,
                        (i_segment + 1) * segment_length)
        xyz = extract_coordinates(a_universe, group, indexes)
        rmsd = rmsd_matrix(xyz, condensed=True)
        z = hierarchy.linkage(rmsd, method='complete')
        for node in Tree(z=z).nodes_at_depth(nc - 1):
            # Find the frame of each representative structure
            i_frame = i_segment * segment_length + node.representative(rmsd).id
            rep_ifr.append(i_frame)
    rep_ifr.sort()

    # Cluster the representative structures
    xyz = extract_coordinates(a_universe, group, rep_ifr)
    rmsd = rmsd_matrix(xyz, condensed=True)
    tree = Tree(z=hierarchy.linkage(rmsd, method='complete'))
    for ileaf, leaf in enumerate(tree.leafs):
        leaf.add_property(ScalarProperty(name='iframe', y=rep_ifr[ileaf]))

    return ClusterTrove(rep_ifr, rmsd, tree)