Exemplo n.º 1
0
def test_all_search(nr_points, dim, bucket_size, query_radius):
    """Test fixed neighbor search.

    Search all point pairs that are within radius.

    Arguments:
     - nr_points: number of points used in test
     - dim: dimension of coords
     - bucket_size: nr of points per tree node
     - query_radius: radius of search

    Returns true if the test passes.
    """
    kdt = KDTree(dim, bucket_size)
    coords = random.random((nr_points, dim))
    kdt.set_coords(coords)
    kdt.all_search(query_radius)
    indices = kdt.all_get_indices()
    if indices is None:
        l1 = 0
    else:
        l1 = len(indices)
    radii = kdt.all_get_radii()
    if radii is None:
        l2 = 0
    else:
        l2 = len(radii)
    if l1 == l2:
        return True
    else:
        return False
Exemplo n.º 2
0
    def _apply_KDTree(self, group):
        """Selection using KDTree but periodic = True not supported.
        """
        sel = self.sel.apply(group)
        ref = sel.center_of_geometry()

        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(group.positions)

        kdtree.search(ref, self.exRadius)
        found_ExtIndices = kdtree.get_indices()
        kdtree.search(ref, self.inRadius)
        found_IntIndices = kdtree.get_indices()
        found_indices = list(set(found_ExtIndices) - set(found_IntIndices))
        return unique(group[found_indices])
Exemplo n.º 3
0
    def __init__(self, atom_group, bucket_size=10):
        """

        Parameters
        ----------
        atom_list : AtomGroup
          list of atoms
        bucket_size : int
          Number of entries in leafs of the KDTree. If you suffer poor
          performance you can play around with this number. Increasing the
          `bucket_size` will speed up the construction of the KDTree but
          slow down the search.
        """
        self.atom_group = atom_group
        self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
        self.kdtree.set_coords(atom_group.positions)
Exemplo n.º 4
0
    def __init__(self, atom_group, box=None, bucket_size=10):
        """

        Parameters
        ----------
        atom_list : AtomGroup
          list of atoms
        box : array-like or ``None``, optional, default ``None``
          Simulation cell dimensions in the form of
          :attr:`MDAnalysis.trajectory.base.Timestep.dimensions` when
          periodic boundary conditions should be taken into account for
          the calculation of contacts.
        bucket_size : int
          Number of entries in leafs of the KDTree. If you suffer poor
          performance you can play around with this number. Increasing the
          `bucket_size` will speed up the construction of the KDTree but
          slow down the search.
        """
        self.atom_group = atom_group
        self._u = atom_group.universe
        self._box = box
        if box is None:
            self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
        else:
            self.kdtree = PeriodicKDTree(box, bucket_size=bucket_size)
        self.kdtree.set_coords(atom_group.positions)
Exemplo n.º 5
0
 def __init__(self, atom_list, bucket_size=10):
     """
     o atom_list - list of atoms. This list is used in the queries.
     It can contain atoms from different structures.
     o bucket_size - bucket size of KD tree. You can play around
     with this to optimize speed if you feel like it.
     """
     self.atom_list = atom_list
     # get the coordinates
     coord_list = [a.get_coord() for a in atom_list]
     # to Nx3 array of type float
     self.coords = numpy.array(coord_list).astype("f")
     assert (bucket_size > 1)
     assert (self.coords.shape[1] == 3)
     self.kdt = KDTree(3, bucket_size)
     self.kdt.set_coords(self.coords)
Exemplo n.º 6
0
 def __init__(self, atom_group, bucket_size=10):
     """
     :Arguments:
      *atom_list*
       list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`)
      *bucket_size*
       Number of entries in leafs of the KDTree. If you suffer poor
       performance you can play around with this number. Increasing the
       `bucket_size` will speed up the construction of the KDTree but
       slow down the search.
     """
     self.atom_group = atom_group
     if not hasattr(atom_group, 'coordinates'):
         raise TypeError('atom_group must have a coordinates() method'
                         '(eq a AtomGroup from a selection)')
     self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
     self.kdtree.set_coords(atom_group.coordinates())
Exemplo n.º 7
0
    def _apply_KDTree(self, group):
        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(group.positions)
        kdtree.search(self.ref, self.cutoff)
        found_indices = kdtree.get_indices()

        return unique(group[found_indices])
Exemplo n.º 8
0
def test_search(nr_points, dim, bucket_size, radius):
    """Test search all points within radius of center.

    Search all point pairs that are within radius.

    Arguments:
     - nr_points: number of points used in test
     - dim: dimension of coords
     - bucket_size: nr of points per tree node
     - radius: radius of search

    Returns true if the test passes.
    """
    kdt = KDTree(dim, bucket_size)
    coords = random.random((nr_points, dim))
    kdt.set_coords(coords)
    kdt.search(coords[0], radius * 100)
    radii = kdt.get_radii()
    l1 = 0
    for i in range(0, nr_points):
        p = coords[i]
        if _dist(p, coords[0]) <= radius * 100:
            l1 = l1 + 1
    if l1 == len(radii):
        return True
    else:
        return False
Exemplo n.º 9
0
def test_all_search(nr_points, dim, bucket_size, query_radius):
    """Test fixed neighbor search.

    Search all point pairs that are within radius.

    Arguments:
     - nr_points: number of points used in test
     - dim: dimension of coords
     - bucket_size: nr of points per tree node
     - query_radius: radius of search

    Returns true if the test passes.
    """
    kdt = KDTree(dim, bucket_size)
    coords = random.random((nr_points, dim))
    kdt.set_coords(coords)
    kdt.all_search(query_radius)
    indices = kdt.all_get_indices()
    if indices is None:
        l1 = 0
    else:
        l1 = len(indices)
    radii = kdt.all_get_radii()
    if radii is None:
        l2 = 0
    else:
        l2 = len(radii)
    if l1 == l2:
        return True
    else:
        return False
Exemplo n.º 10
0
    def _apply_KDTree(self, group):
        box = group.dimensions if self.periodic else None
        if box is None:
            kdtree = KDTree(dim=3, bucket_size=10)
        else:
            kdtree = PeriodicKDTree(box, bucket_size=10)
        kdtree.set_coords(group.positions)
        kdtree.search(self.ref, self.cutoff)
        found_indices = kdtree.get_indices()

        return group[found_indices].unique
Exemplo n.º 11
0
 def test_KDTree_exceptions(self):
     kdt = KDTree(dim, bucket_size)
     with self.assertRaises(Exception) as context:
         kdt.set_coords(random.random((nr_points, dim)) * 100000000000000)
     self.assertTrue(
         "Points should lie between -1e6 and 1e6" in str(context.exception))
     with self.assertRaises(Exception) as context:
         kdt.set_coords(random.random((nr_points, dim - 2)))
     self.assertTrue("Expected a Nx%i NumPy array" %
                     dim in str(context.exception))
     with self.assertRaises(Exception) as context:
         kdt.search(array([0, 0, 0]), radius)
     self.assertTrue("No point set specified" in str(context.exception))
Exemplo n.º 12
0
def test_search(nr_points, dim, bucket_size, radius):
    """Test search all points within radius of center.

    Search all point pairs that are within radius.

    Arguments:
     - nr_points: number of points used in test
     - dim: dimension of coords
     - bucket_size: nr of points per tree node
     - radius: radius of search

    Returns true if the test passes.
    """
    kdt = KDTree(dim, bucket_size)
    coords = random.random((nr_points, dim))
    kdt.set_coords(coords)
    kdt.search(coords[0], radius * 100)
    radii = kdt.get_radii()
    l1 = 0
    for i in range(0, nr_points):
        p = coords[i]
        if _dist(p, coords[0]) <= radius * 100:
            l1 = l1 + 1
    if l1 == len(radii):
        return True
    else:
        return False
Exemplo n.º 13
0
    def _apply_KDTree(self, group):
        """KDTree based selection is about 7x faster than distmat for typical problems.
        Limitations: always ignores periodicity
        """
        # group is wrong, should be universe (?!)
        sel_atoms = self.sel._apply(group)
        # list needed for back-indexing
        sys_atoms_list = [a for a in (self._group_atoms - sel_atoms)]
        sel_indices = numpy.array([a.index for a in sel_atoms], dtype=int)
        sys_indices = numpy.array([a.index for a in sys_atoms_list], dtype=int)
        sel_coor = Selection.coord[sel_indices]

        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(Selection.coord[sys_indices])
        found_indices = []
        for atom in numpy.array(sel_coor):
            kdtree.search(atom, self.cutoff)
            found_indices.append(kdtree.get_indices())

        # the list-comprehension here can be understood as a nested loop.
        # for list in found_indices:
        #     for i in list:
        #         yield sys_atoms_list[i]
        # converting found_indices to a numpy array won't reallt work since
        # each we will find a different number of neighbors for each center in
        # sel_coor.
        res_atoms = [sys_atoms_list[i] for list in found_indices for i in list]
        return set(res_atoms)
Exemplo n.º 14
0
 def test_KDTree_exceptions(self):
     kdt = KDTree(dim, bucket_size)
     with self.assertRaises(Exception) as context:
         kdt.set_coords(random.random((nr_points, dim)) * 100000000000000)
     self.assertTrue("Points should lie between -1e6 and 1e6" in str(context.exception))
     with self.assertRaises(Exception) as context:
         kdt.set_coords(random.random((nr_points, dim - 2)))
     self.assertTrue("Expected a Nx%i NumPy array" % dim in str(context.exception))
     with self.assertRaises(Exception) as context:
         kdt.search(array([0, 0, 0]), radius)
     self.assertTrue("No point set specified" in str(context.exception))
Exemplo n.º 15
0
 def _apply_KDTree(self, group):
     """Selection using KDTree and PeriodicKDTree for aperiodic and
     fully-periodic systems, respectively.
     """
     sel = self.sel.apply(group)
     box = self.validate_dimensions(group.dimensions)
     ref = sel.center_of_geometry(pbc=self.periodic)
     if box is None:
         kdtree = KDTree(dim=3, bucket_size=10)
     else:
         kdtree = PeriodicKDTree(box, bucket_size=10)
     kdtree.set_coords(group.positions)
     kdtree.search(ref, self.cutoff)
     found_indices = kdtree.get_indices()
     return group[found_indices].unique
Exemplo n.º 16
0
    def _apply_KDTree(self, group):
        """Selection using KDTree but periodic = True not supported.
        (KDTree routine is ca 15% slower than the distance matrix one)
        """
        sel = self.sel.apply(group)
        ref = sel.center_of_geometry()

        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(group.positions)
        kdtree.search(ref, self.cutoff)
        found_indices = kdtree.get_indices()

        return unique(group[found_indices])
Exemplo n.º 17
0
    def _apply_KDTree(self, group):
        """Selection using KDTree but periodic = True not supported.
        """
        sys_indices = numpy.array([a.index for a in self._group_atoms_list])
        sys_coor = Selection.coord[sys_indices]
        # group is wrong, should be universe (?!)
        sel_atoms = self.sel._apply(group)
        sel_CoG = AtomGroup(sel_atoms).center_of_geometry()
        self.ref = numpy.array((sel_CoG[0], sel_CoG[1], sel_CoG[2]))
        if self.periodic:
            pass  # or warn? -- no periodic functionality with KDTree search
        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(sys_coor)

        kdtree.search(self.ref, self.exRadius)
        found_ExtIndices = kdtree.get_indices()
        kdtree.search(self.ref, self.inRadius)
        found_IntIndices = kdtree.get_indices()
        found_indices = list(set(found_ExtIndices) - set(found_IntIndices))
        res_atoms = [self._group_atoms_list[i] for i in found_indices]
        return set(res_atoms)
Exemplo n.º 18
0
 def __init__(self, atom_list, bucket_size=10):
     """
     o atom_list - list of atoms. This list is used in the queries.
     It can contain atoms from different structures.
     o bucket_size - bucket size of KD tree. You can play around
     with this to optimize speed if you feel like it.
     """
     self.atom_list=atom_list
     # get the coordinates
     coord_list = [a.get_coord() for a in atom_list]
     # to Nx3 array of type float
     self.coords=numpy.array(coord_list).astype("f")
     assert(bucket_size>1)
     assert(self.coords.shape[1]==3)
     self.kdt=KDTree(3, bucket_size)
     self.kdt.set_coords(self.coords)
Exemplo n.º 19
0
    def __init__(self, atom_group, bucket_size=10):
        """

        Parameters
        ----------
        atom_list : AtomGroup
          list of atoms
        bucket_size : int
          Number of entries in leafs of the KDTree. If you suffer poor
          performance you can play around with this number. Increasing the
          `bucket_size` will speed up the construction of the KDTree but
          slow down the search.
        """
        self.atom_group = atom_group
        self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
        self.kdtree.set_coords(atom_group.positions)
Exemplo n.º 20
0
 def __init__(self, atom_group, bucket_size=10):
     """
     :Arguments:
      *atom_list*
       list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`)
      *bucket_size*
       Number of entries in leafs of the KDTree. If you suffer poor
       performance you can play around with this number. Increasing the
       `bucket_size` will speed up the construction of the KDTree but
       slow down the search.
     """
     self.atom_group = atom_group
     if not hasattr(atom_group, 'coordinates'):
         raise TypeError('atom_group must have a coordinates() method'
                         '(eq a AtomGroup from a selection)')
     self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
     self.kdtree.set_coords(atom_group.coordinates())
Exemplo n.º 21
0
    def _apply_KDTree(self, group):
        """KDTree based selection is about 7x faster than distmat
        for typical problems.
        Limitations: always ignores periodicity
        """
        sel = self.sel.apply(group)
        # All atoms in group that aren't in sel
        sys = group[~np.in1d(group.indices, sel.indices)]

        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(sys.positions)
        found_indices = []
        for atom in sel.positions:
            kdtree.search(atom, self.cutoff)
            found_indices.append(kdtree.get_indices())
        # These are the indices from SYS that were seen when
        # probing with SEL
        unique_idx = np.unique(np.concatenate(found_indices))
        return unique(sys[unique_idx.astype(np.int32)])
Exemplo n.º 22
0
    def _apply_KDTree(self, group):
        """Selection using KDTree but periodic = True not supported.
        (KDTree routine is ca 15% slower than the distance matrix one)
        """
        sys_indices = numpy.array([a.index for a in self._group_atoms_list])
        sys_coor = Selection.coord[sys_indices]
        sel_atoms = self.sel._apply(group)  # group is wrong, should be universe (?!)
        sel_CoG = AtomGroup(sel_atoms).center_of_geometry()
        self.ref = numpy.array((sel_CoG[0], sel_CoG[1], sel_CoG[2]))
        if self.periodic:
            pass  # or warn? -- no periodic functionality with KDTree search

        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(sys_coor)
        kdtree.search(self.ref, self.cutoff)
        found_indices = kdtree.get_indices()
        res_atoms = [self._group_atoms_list[i] for i in found_indices]
        return set(res_atoms)
Exemplo n.º 23
0
    def _apply_KDTree(self, group):
        """KDTree based selection is about 7x faster than distmat
        for typical problems.
        Limitations: always ignores periodicity
        """
        sel = self.sel.apply(group)
        # All atoms in group that aren't in sel
        sys = group[~np.in1d(group.indices, sel.indices)]

        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(sys.positions)
        found_indices = []
        for atom in sel.positions:
            kdtree.search(atom, self.cutoff)
            found_indices.append(kdtree.get_indices())
        # These are the indices from SYS that were seen when
        # probing with SEL
        unique_idx = np.unique(np.concatenate(found_indices))
        return sys[unique_idx.astype(np.int32)].unique
Exemplo n.º 24
0
class AtomNeighborSearch():
    """This class can be used to find all atoms/residues/segements within the
    radius of a given query position.

    This class is using the BioPython KDTree for the neighborsearch. This class
    also does not apply PBC to the distance calculattions. So you have to ensure
    yourself that the trajectory has been corrected for PBC artifacts.
    """

    def __init__(self, atom_group, bucket_size=10):
        """
        :Arguments:
         *atom_list*
          list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`)
         *bucket_size*
          Number of entries in leafs of the KDTree. If you suffer poor
          performance you can play around with this number. Increasing the
          `bucket_size` will speed up the construction of the KDTree but
          slow down the search.
        """
        self.atom_group = atom_group
        if not hasattr(atom_group, 'coordinates'):
            raise TypeError('atom_group must have a coordinates() method'
                            '(eq a AtomGroup from a selection)')
        self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
        self.kdtree.set_coords(atom_group.coordinates())

    def search(self, atoms, radius, level='A'):
        """
        Return all atoms/residues/segments that are within *radius* of the
        atoms in *atoms*.

        :Arguments:
         *atoms*
          list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`)
         *radius*
          float. Radius for search in Angstrom.
         *level* (optional)
          char (A, R, S). Return atoms(A), residues(R) or segments(S) within
          *radius* of *atoms*.
        """
        indices = []
        for atom in atoms.coordinates():
            self.kdtree.search(atom, radius)
            indices.append(self.kdtree.get_indices())
        unique_idx = numpy.unique([i for l in indices for i in l])
        return self._index2level(unique_idx, level)

    def _index2level(self, indices, level):
        """ Convert list of atom_indices in a AtomGroup to either the
            Atoms or segments/residues containing these atoms.

        :Arguments:
         *indices*
           list of atom indices
         *level*
          char (A, R, S). Return atoms(A), residues(R) or segments(S) within
          *radius* of *atoms*.
        """
        n_atom_list = [self.atom_group[i] for i in indices]
        if level == 'A':
            if len(n_atom_list) == 0:
                return []
            else:
                return AtomGroup(n_atom_list)
        elif level == 'R':
            return list(set([a.residue for a in n_atom_list]))
        elif level == 'S':
            return list(set([a.segment for a in n_atom_list]))
        else:
            raise NotImplementedError('{}: level not implemented'.format(level))
Exemplo n.º 25
0
class AtomNeighborSearch(object):
    """This class can be used to find all atoms/residues/segements within the
    radius of a given query position.

    This class is using the BioPython KDTree for the neighborsearch. This class
    also does not apply PBC to the distance calculattions. So you have to ensure
    yourself that the trajectory has been corrected for PBC artifacts.
    """
    def __init__(self, atom_group, bucket_size=10):
        """
        :Arguments:
         *atom_list*
          list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`)
         *bucket_size*
          Number of entries in leafs of the KDTree. If you suffer poor
          performance you can play around with this number. Increasing the
          `bucket_size` will speed up the construction of the KDTree but
          slow down the search.
        """
        self.atom_group = atom_group
        if not hasattr(atom_group, 'coordinates'):
            raise TypeError('atom_group must have a coordinates() method'
                            '(eq a AtomGroup from a selection)')
        self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
        self.kdtree.set_coords(atom_group.coordinates())

    def search(self, atoms, radius, level='A'):
        """
        Return all atoms/residues/segments that are within *radius* of the
        atoms in *atoms*.

        :Arguments:
         *atoms*
          list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`)
         *radius*
          float. Radius for search in Angstrom.
         *level* (optional)
          char (A, R, S). Return atoms(A), residues(R) or segments(S) within
          *radius* of *atoms*.
        """
        indices = []
        for atom in atoms.coordinates():
            self.kdtree.search(atom, radius)
            indices.append(self.kdtree.get_indices())
        unique_idx = np.unique([i for l in indices for i in l])
        return self._index2level(unique_idx, level)

    def _index2level(self, indices, level):
        """ Convert list of atom_indices in a AtomGroup to either the
            Atoms or segments/residues containing these atoms.

        :Arguments:
         *indices*
           list of atom indices
         *level*
          char (A, R, S). Return atoms(A), residues(R) or segments(S) within
          *radius* of *atoms*.
        """
        n_atom_list = [self.atom_group[i] for i in indices]
        if level == 'A':
            if len(n_atom_list) == 0:
                return []
            else:
                return AtomGroup(n_atom_list)
        elif level == 'R':
            return list(set([a.residue for a in n_atom_list]))
        elif level == 'S':
            return list(set([a.segment for a in n_atom_list]))
        else:
            raise NotImplementedError(
                '{0}: level not implemented'.format(level))
Exemplo n.º 26
0
class NeighborSearch(object):
    """Class for neighbor searching,

    This class can be used for two related purposes:

     1. To find all atoms/residues/chains/models/structures within radius
        of a given query position.
     2. To find all atoms/residues/chains/models/structures that are within
        a fixed radius of each other.

    NeighborSearch makes use of the Bio.KDTree C++ module, so it's fast.
    """
    def __init__(self, atom_list, bucket_size=10):
        """Create the object.

        Arguments:

         - atom_list - list of atoms. This list is used in the queries.
           It can contain atoms from different structures.
         - bucket_size - bucket size of KD tree. You can play around
           with this to optimize speed if you feel like it.

        """
        self.atom_list = atom_list
        # get the coordinates
        coord_list = [a.get_coord() for a in atom_list]
        # to Nx3 array of type float
        self.coords = numpy.array(coord_list).astype("f")
        assert (bucket_size > 1)
        assert (self.coords.shape[1] == 3)
        self.kdt = KDTree(3, bucket_size)
        self.kdt.set_coords(self.coords)

    # Private

    def _get_unique_parent_pairs(self, pair_list):
        # translate a list of (entity, entity) tuples to
        # a list of (parent entity, parent entity) tuples,
        # thereby removing duplicate (parent entity, parent entity)
        # pairs.
        # o pair_list - a list of (entity, entity) tuples
        parent_pair_list = []
        for (e1, e2) in pair_list:
            p1 = e1.get_parent()
            p2 = e2.get_parent()
            if p1 == p2:
                continue
            elif p1 < p2:
                parent_pair_list.append((p1, p2))
            else:
                parent_pair_list.append((p2, p1))
        return uniqueify(parent_pair_list)

    # Public

    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:
         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        self.kdt.search(center, radius)
        indices = self.kdt.get_indices()
        n_atom_list = []
        atom_list = self.atom_list
        for i in indices:
            a = atom_list[i]
            n_atom_list.append(a)
        if level == "A":
            return n_atom_list
        else:
            return unfold_entities(n_atom_list, level)

    def search_all(self, radius, level="A"):
        """All neighbor search.

        Search all entities that have atoms pairs within
        radius.

        Arguments:
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        self.kdt.all_search(radius)
        indices = self.kdt.all_get_indices()
        atom_list = self.atom_list
        atom_pair_list = []
        for i1, i2 in indices:
            a1 = atom_list[i1]
            a2 = atom_list[i2]
            atom_pair_list.append((a1, a2))
        if level == "A":
            # return atoms
            return atom_pair_list
        next_level_pair_list = atom_pair_list
        for l in ["R", "C", "M", "S"]:
            next_level_pair_list = self._get_unique_parent_pairs(
                next_level_pair_list)
            if level == l:
                return next_level_pair_list
Exemplo n.º 27
0
class AtomNeighborSearch(object):
    """This class can be used to find all atoms/residues/segements within the
    radius of a given query position.

    This class is using the BioPython KDTree for the neighborsearch. This class
    also does not apply PBC to the distance calculattions. So you have to ensure
    yourself that the trajectory has been corrected for PBC artifacts.
    """

    def __init__(self, atom_group, bucket_size=10):
        """

        Parameters
        ----------
        atom_list : AtomGroup
          list of atoms
        bucket_size : int
          Number of entries in leafs of the KDTree. If you suffer poor
          performance you can play around with this number. Increasing the
          `bucket_size` will speed up the construction of the KDTree but
          slow down the search.
        """
        self.atom_group = atom_group
        self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
        self.kdtree.set_coords(atom_group.positions)

    def search(self, atoms, radius, level='A'):
        """
        Return all atoms/residues/segments that are within *radius* of the
        atoms in *atoms*.

        Parameters
        ----------
        atoms : AtomGroup
          list of atoms
        radius : float
          Radius for search in Angstrom.
        level : str
          char (A, R, S). Return atoms(A), residues(R) or segments(S) within
          *radius* of *atoms*.
        """
        indices = []
        for atom in atoms.coordinates():
            self.kdtree.search(atom, radius)
            indices.append(self.kdtree.get_indices())
        unique_idx = np.unique([i for l in indices for i in l])
        return self._index2level(unique_idx, level)

    def _index2level(self, indices, level):
        """Convert list of atom_indices in a AtomGroup to either the
        Atoms or segments/residues containing these atoms.

        Parameters
        ----------
        indices
           list of atom indices
        level : str
          char (A, R, S). Return atoms(A), residues(R) or segments(S) within
          *radius* of *atoms*.
        """
        n_atom_list = [self.atom_group[i] for i in indices]
        if level == 'A':
            if len(n_atom_list) == 0:
                return []
            else:
                return AtomGroup(n_atom_list)
        elif level == 'R':
            return list({a.residue for a in n_atom_list})
        elif level == 'S':
            return list(set([a.segment for a in n_atom_list]))
        else:
            raise NotImplementedError('{0}: level not implemented'.format(level))
Exemplo n.º 28
0
def anal_jointdist(prot,
                   rfirst,
                   mols,
                   cutoff,
                   midz,
                   box,
                   mat,
                   matburr,
                   buried=None):
    """
  Calculates the joint probability of residue interactions

  Parameters
  ----------
  prot : AtomSelection
    the protein atoms
  rfirst : NumpyArray
    the first atom of each residue in the protein
  mols : NumpyArray
    the centroid of the molecule of interest
  cutoff : float
    the contact cut-off
  midz : float
    the middle of the bilayer
  box : NumpyArray
    the box sides
  mat : NumpyArray
    the joint probability for non-buried molecules
  matburr : NumpyArray
    the joint probability for buried molecules
  buried : NumpyArray of boolean, optional
    flag to indicate buried molecule
  """
    imat = np.zeros(mat.shape)
    imatburr = np.zeros(matburr.shape)

    # Calculate all distances at once
    if box is not None:
        dist_all = distances.distance_array(np.array(mols),
                                            prot.get_positions(), box)
    else:
        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(prot.get_positions())

    # Loop over all molecules
    for mi, mol in enumerate(mols):
        if box is not None:
            dist = dist_all[mi, :]
        else:
            dist = np.ones(prot.get_positions().shape[0]) + cutoff
            kdtree.search(np.array([mol]), cutoff)
            for i in kdtree.get_indices():
                dist[i] = 0.0

        # Check if this molecule is on
        if dist.min() >= cutoff: continue

        # Check contacts for reach residue
        for i in range(len(rfirst) - 1):
            if dist[rfirst[i]:rfirst[i + 1]].min() >= cutoff: continue
            if (buried is not None and buried[mi]):
                imatburr[i, i] = 1
            else:
                imat[i, i] = 1

            for j in range(i + 1, len(rfirst) - 1):
                if dist[rfirst[j]:rfirst[j + 1]].min() >= cutoff: continue

                if (buried is not None and buried[mi]):
                    imatburr[i, j] = 1
                    imatburr[j, i] = 1
                else:
                    imat[i, j] = 1
                    imat[j, i] = 1

    return (mat + imat, matburr + imatburr)
Exemplo n.º 29
0
def anal_contacts(prot,
                  rfirst,
                  mols,
                  cutoff,
                  midz,
                  box,
                  molfile,
                  resfile,
                  burresfile=None,
                  buried=None,
                  reslist=None,
                  reslistfile=None):
    """
  Calculates a range of contacts and write out contact vectors to files

  Parameters
  ----------
  prot : AtomSelection
    the protein atoms
  rfirst : NumpyArray
    the first atom of each residue in the protein
  mols : NumpyArray
    the centroid of the molecule of interest
  cutoff : float
    the contact cut-off
  midz : float
    the middle of the bilayer
  box : NumpyArray
    the box sides
  molfile : fileobject
    the file to write molecular contacts to
  resfile : fileobject
    the file to write residue contacts to
  burresfile : fileobject, optional
    the file to write buried residue contacts to
  buried : NumpyArray of boolean, optional
    flag to indicate buried molecule
  reslist : list
    a list of residues to write out individual mol contacts
  reslistfile : fileobject
    the file to write out individual mol contacts to
  """
    molon = np.zeros(len(mols), dtype=bool)
    reson = np.zeros(len(rfirst) - 1, dtype=bool)
    if buried is not None:
        burreson = np.zeros(len(rfirst) - 1, dtype=bool)
    if reslist is not None:
        resonlist = np.zeros([len(reslist), len(mols)], dtype=bool)

    # Calculate all distances at once
    if box is not None:
        dist_all = distances.distance_array(np.array(mols),
                                            prot.get_positions(), box)
    else:
        kdtree = KDTree(dim=3, bucket_size=10)
        kdtree.set_coords(prot.get_positions())

    # Loop over all molecules
    for mi, mol in enumerate(mols):
        if box is not None:
            dist = dist_all[mi, :]
        else:
            dist = np.ones(prot.get_positions().shape[0]) + cutoff
            kdtree.search(np.array([mol]), cutoff)
            for i in kdtree.get_indices():
                dist[i] = 0.0

        # Check if this molecule is on
        molon[mi] = dist.min() < cutoff
        if molon[mi]:
            # Check contacts for reach residue
            for i in range(len(rfirst) - 1):
                if dist[rfirst[i]:rfirst[i + 1]].min() < cutoff:
                    if (burresfile is not None and buried[mi]):
                        burreson[i] = True
                    else:
                        reson[i] = True
                    if reslist is not None and i in reslist:
                        resonlist[reslist.index(i), mi] = True

    # Write state information to file
    write_booleans(molfile, molon)
    write_booleans(resfile, reson)
    if buried is not None:
        write_booleans(burresfile, burreson)
    if reslist is not None:
        write_booleans(reslistfile,
                       resonlist.reshape(len(reslist) * len(mols)))
Exemplo n.º 30
0
    def _apply_KDTree(self, group):
        """KDTree based selection is about 7x faster than distmat
        for typical problems.
        """
        sel = self.sel.apply(group)
        # All atoms in group that aren't in sel
        sys = group[~np.in1d(group.indices, sel.indices)]

        box = self.validate_dimensions(group.dimensions)
        if box is None:
            kdtree = KDTree(dim=3, bucket_size=10)
            kdtree.set_coords(sys.positions)
            found_indices = []
            for atom in sel.positions:
                kdtree.search(atom, self.cutoff)
                found_indices.append(kdtree.get_indices())
            unique_idx = np.unique(np.concatenate(found_indices))

        else:
            kdtree = PeriodicKDTree(box, bucket_size=10)
            kdtree.set_coords(sys.positions)
            kdtree.search(sel.positions, self.cutoff)
            unique_idx = np.asarray(kdtree.get_indices())

        # These are the indices from SYS that were seen when
        # probing with SEL
        return sys[unique_idx.astype(np.int32)].unique
Exemplo n.º 31
0
def anal_contacts(prot,rfirst,mols,cutoff,midz,box,molfile,resfile,
                    burresfile=None,buried=None,reslist=None,reslistfile=None) :
  """
  Calculates a range of contacts and write out contact vectors to files

  Parameters
  ----------
  prot : AtomSelection
    the protein atoms
  rfirst : NumpyArray
    the first atom of each residue in the protein
  mols : NumpyArray
    the centroid of the molecule of interest
  cutoff : float
    the contact cut-off
  midz : float
    the middle of the bilayer
  box : NumpyArray
    the box sides
  molfile : fileobject
    the file to write molecular contacts to
  resfile : fileobject
    the file to write residue contacts to
  burresfile : fileobject, optional
    the file to write buried residue contacts to
  buried : NumpyArray of boolean, optional
    flag to indicate buried molecule
  reslist : list
    a list of residues to write out individual mol contacts
  reslistfile : fileobject
    the file to write out individual mol contacts to
  """
  molon = np.zeros(len(mols),dtype=bool)
  reson = np.zeros(len(rfirst)-1,dtype=bool)
  if buried is not None :
    burreson = np.zeros(len(rfirst)-1,dtype=bool)
  if reslist is not None :
    resonlist = np.zeros([len(reslist),len(mols)],dtype=bool)

  # Calculate all distances at once
  if box is not None :
    dist_all = distances.distance_array(np.array(mols), prot.get_positions(), box)
  else :
    kdtree = KDTree(dim=3, bucket_size=10)
    kdtree.set_coords(prot.get_positions())

  # Loop over all molecules
  for mi,mol in enumerate(mols) :
    if box is not None :
      dist = dist_all[mi,:]
    else :
      dist = np.ones(prot.get_positions().shape[0])+cutoff
      kdtree.search(np.array([mol]), cutoff)
      for i in kdtree.get_indices() : dist[i] = 0.0

    # Check if this molecule is on
    molon[mi] = dist.min() < cutoff
    if  molon[mi] :
      # Check contacts for reach residue
      for i in range(len(rfirst)-1) :
        if dist[rfirst[i]:rfirst[i+1]].min() < cutoff :
          if (burresfile is not None and buried[mi]) :
            burreson[i] = True
          else :
            reson[i] = True
          if reslist is not None and i in reslist:
            resonlist[reslist.index(i),mi] = True

  # Write state information to file
  write_booleans(molfile,molon)
  write_booleans(resfile,reson)
  if buried is not None :
    write_booleans(burresfile,burreson)
  if reslist is not None:
    write_booleans(reslistfile,resonlist.reshape(len(reslist)*len(mols)))
Exemplo n.º 32
0
def anal_jointdist(prot,rfirst,mols,cutoff,midz,box,mat,matburr,buried=None) :
  """
  Calculates the joint probability of residue interactions

  Parameters
  ----------
  prot : AtomSelection
    the protein atoms
  rfirst : NumpyArray
    the first atom of each residue in the protein
  mols : NumpyArray
    the centroid of the molecule of interest
  cutoff : float
    the contact cut-off
  midz : float
    the middle of the bilayer
  box : NumpyArray
    the box sides
  mat : NumpyArray
    the joint probability for non-buried molecules
  matburr : NumpyArray
    the joint probability for buried molecules
  buried : NumpyArray of boolean, optional
    flag to indicate buried molecule
  """
  imat = np.zeros(mat.shape)
  imatburr = np.zeros(matburr.shape)

  # Calculate all distances at once
  if box is not None :
    dist_all = distances.distance_array(np.array(mols), prot.get_positions(), box)
  else :
    kdtree = KDTree(dim=3, bucket_size=10)
    kdtree.set_coords(prot.get_positions())

  # Loop over all molecules
  for mi,mol in enumerate(mols) :
    if box is not None :
      dist = dist_all[mi,:]
    else :
      dist = np.ones(prot.get_positions().shape[0])+cutoff
      kdtree.search(np.array([mol]), cutoff)
      for i in kdtree.get_indices() : dist[i] = 0.0

    # Check if this molecule is on
    if dist.min() >= cutoff : continue

    # Check contacts for reach residue
    for i in range(len(rfirst)-1) :
      if dist[rfirst[i]:rfirst[i+1]].min() >= cutoff : continue
      if (buried is not None and buried[mi]) :
        imatburr[i,i] = 1
      else :
        imat[i,i] = 1

      for j in range(i+1,len(rfirst)-1) :
        if dist[rfirst[j]:rfirst[j+1]].min() >= cutoff : continue

        if (buried is not None and buried[mi]) :
          imatburr[i,j] = 1
          imatburr[j,i] = 1
        else :
          imat[i,j] = 1
          imat[j,i] = 1

  return (mat+imat,matburr+imatburr)
Exemplo n.º 33
0
class NeighborSearch(object):
    """Class for neighbor searching,

    This class can be used for two related purposes:

     1. To find all atoms/residues/chains/models/structures within radius
        of a given query position.
     2. To find all atoms/residues/chains/models/structures that are within
        a fixed radius of each other.

    NeighborSearch makes use of the Bio.KDTree C++ module, so it's fast.
    """

    def __init__(self, atom_list, bucket_size=10):
        """Create the object.

        Arguments:

         - atom_list - list of atoms. This list is used in the queries.
           It can contain atoms from different structures.
         - bucket_size - bucket size of KD tree. You can play around
           with this to optimize speed if you feel like it.
        """
        self.atom_list = atom_list
        # get the coordinates
        coord_list = [a.get_coord() for a in atom_list]
        # to Nx3 array of type float
        self.coords = numpy.array(coord_list).astype("f")
        assert bucket_size > 1
        assert self.coords.shape[1] == 3
        self.kdt = KDTree(3, bucket_size)
        self.kdt.set_coords(self.coords)

    # Private

    def _get_unique_parent_pairs(self, pair_list):
        # translate a list of (entity, entity) tuples to
        # a list of (parent entity, parent entity) tuples,
        # thereby removing duplicate (parent entity, parent entity)
        # pairs.
        # o pair_list - a list of (entity, entity) tuples
        parent_pair_list = []
        for (e1, e2) in pair_list:
            p1 = e1.get_parent()
            p2 = e2.get_parent()
            if p1 == p2:
                continue
            elif p1 < p2:
                parent_pair_list.append((p1, p2))
            else:
                parent_pair_list.append((p2, p1))
        return uniqueify(parent_pair_list)

    # Public

    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:

         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)
        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        self.kdt.search(center, radius)
        indices = self.kdt.get_indices()
        n_atom_list = []
        atom_list = self.atom_list
        for i in indices:
            a = atom_list[i]
            n_atom_list.append(a)
        if level == "A":
            return n_atom_list
        else:
            return unfold_entities(n_atom_list, level)

    def search_all(self, radius, level="A"):
        """All neighbor search.

        Search all entities that have atoms pairs within
        radius.

        Arguments:

         - radius - float
         - level - char (A, R, C, M, S)
        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        self.kdt.all_search(radius)
        indices = self.kdt.all_get_indices()
        atom_list = self.atom_list
        atom_pair_list = []
        for i1, i2 in indices:
            a1 = atom_list[i1]
            a2 = atom_list[i2]
            atom_pair_list.append((a1, a2))
        if level == "A":
            # return atoms
            return atom_pair_list
        next_level_pair_list = atom_pair_list
        for l in ["R", "C", "M", "S"]:
            next_level_pair_list = self._get_unique_parent_pairs(next_level_pair_list)
            if level == l:
                return next_level_pair_list
Exemplo n.º 34
0
class AtomNeighborSearch(object):
    """This class can be used to find all atoms/residues/segements within the
    radius of a given query position.

    This class is using the BioPython KDTree for the neighborsearch. This class
    also does not apply PBC to the distance calculattions. So you have to ensure
    yourself that the trajectory has been corrected for PBC artifacts.
    """

    def __init__(self, atom_group, bucket_size=10):
        """

        Parameters
        ----------
        atom_list : AtomGroup
          list of atoms
        bucket_size : int
          Number of entries in leafs of the KDTree. If you suffer poor
          performance you can play around with this number. Increasing the
          `bucket_size` will speed up the construction of the KDTree but
          slow down the search.
        """
        self.atom_group = atom_group
        self._u = atom_group.universe
        self.kdtree = KDTree(dim=3, bucket_size=bucket_size)
        self.kdtree.set_coords(atom_group.positions)

    def search(self, atoms, radius, level='A'):
        """
        Return all atoms/residues/segments that are within *radius* of the
        atoms in *atoms*.

        Parameters
        ----------
        atoms : AtomGroup, MDAnalysis.core.groups.Atom
          list of atoms
        radius : float
          Radius for search in Angstrom.
        level : str
          char (A, R, S). Return atoms(A), residues(R) or segments(S) within
          *radius* of *atoms*.
        """
        if isinstance(atoms, Atom):
            positions = atoms.position.reshape(1, 3)
        else:
            positions = atoms.positions

        indices = []
        for pos in positions:
            self.kdtree.search(pos, radius)
            indices.append(self.kdtree.get_indices())
        unique_idx = np.unique([i for l in indices for i in l]).astype(np.int64)
        return self._index2level(unique_idx, level)

    def _index2level(self, indices, level):
        """Convert list of atom_indices in a AtomGroup to either the
        Atoms or segments/residues containing these atoms.

        Parameters
        ----------
        indices
           list of atom indices
        level : str
          char (A, R, S). Return atoms(A), residues(R) or segments(S) within
          *radius* of *atoms*.
        """
        n_atom_list = self.atom_group[indices]
        if level == 'A':
            if not n_atom_list:
                return []
            else:
                return n_atom_list
        elif level == 'R':
            return list({a.residue for a in n_atom_list})
        elif level == 'S':
            return list(set([a.segment for a in n_atom_list]))
        else:
            raise NotImplementedError('{0}: level not implemented'.format(level))