Пример #1
0
def get_layer_heights_kmeans(traj, cell, n, surface_normal=np.array([0, 0,
                                                                     1])):
    """Find the heights of the centers of the layers, along `surface_normal`, in `traj`.

    Uses k-means over all (`surface_normal`-relative) heights in the trajectory.

    Args:
        - traj (ndarray n_frames x n_atoms x 3)
        - cell (ndarray 3x3 matrix)
        - n (int): The number of layers to identify (the k for k-means).
        - surface_normal (3-vector): A unit vector normal to the surface. Defaults
            to the z direction <0, 0, 1>.
    Returns:
        sorted ndarray of heights along surface normal
    """
    from sklearn.cluster import KMeans

    # We have to wrap first to get consistant results along the surface normal
    traj = traj.copy().reshape(-1, 3)
    pbcc = PBCCalculator(cell)
    pbcc.wrap_points(traj)

    heights = np.dot(surface_normal, traj.T)

    kmeans = KMeans(n_clusters=n).fit(heights.reshape(-1, 1))
    heights = kmeans.cluster_centers_.reshape(-1)
    heights.sort()
    return heights
Пример #2
0
def calculate_coord_numbers(traj, atoms, cutoff):
    """Compute the coordination numbers for `mask` atoms at all times in `traj`.

    Args:
        - traj (ndarray n_frames x n_atoms x 3)
        - mask (ndarray bool n_atoms)
        - atoms (ase.Atoms)
        - cutoff (float, distance units)
        - skin (float, distance units, default: 0)
    Returns:
        ndarray of int, n_frames x n_atoms
    """
    n_atoms = len(atoms)

    # Prealloc buffers
    out = np.full(shape=(len(traj), n_atoms), fill_value=-1, dtype=np.int)
    distbuf = np.empty(shape=(n_atoms, n_atoms))
    neighborbuf = np.empty(shape=(n_atoms, n_atoms), dtype=np.bool)

    pbcc = PBCCalculator(atoms.cell)

    for f_idex, frame in enumerate(tqdm(traj)):
        pbcc.pairwise_distances(frame, out=distbuf)
        np.less_equal(distbuf, cutoff, out=neighborbuf)
        np.sum(neighborbuf, axis=1, out=out[f_idex])

    out -= 1  # Previous sum always counted atom itself in its CN, which is wrong

    assert np.min(out) >= 0

    return out
Пример #3
0
    def run(self, st):
        """
        Args:
            st (SiteTrajectory)
        Returns:
            A ``SiteNetwork``.
        """
        assert isinstance(st, SiteTrajectory)
        if st.real_trajectory is None:
            raise ValueError(
                "SiteTrajectory must have associated real trajectory.")

        pbcc = PBCCalculator(st.site_network.structure.cell)
        # Maximum length
        centers = np.empty(shape=(self.n * st.site_network.n_sites, 3),
                           dtype=st.real_trajectory.dtype)
        centers.fill(np.nan)
        types = np.empty(shape=centers.shape[0], dtype=np.int)
        types.fill(np.nan)

        current_idex = 0
        for site in range(st.site_network.n_sites):
            if self.weighted:
                pts, confs = st.real_positions_for_site(
                    site, return_confidences=True)
            else:
                pts = st.real_positions_for_site(site)
                confs = np.ones(shape=len(pts), dtype=np.int)

            old_idex = current_idex

            if len(pts) > self.n:
                sanity = 0
                for i in range(self.n):
                    ps = pts[i::self.n]
                    sanity += len(ps)
                    c = confs[i::self.n]
                    centers[current_idex] = pbcc.average(ps, weights=c)
                    current_idex += 1

                assert sanity == len(pts)
                assert current_idex - old_idex == self.n
            else:
                if self.error_on_insufficient:
                    raise ValueError(
                        "Insufficient points assigned to site %i (%i) to take %i averages."
                        % (site, len(pts), self.n))
                centers[current_idex:current_idex + len(pts)] = pts
                current_idex += len(pts)

            types[old_idex:current_idex] = site

        sn = st.site_network.copy()
        sn.centers = centers[:current_idex]
        sn.site_types = types[:current_idex]

        assert not (np.isnan(np.sum(sn.centers))
                    or np.isnan(np.sum(sn.site_types)))

        return sn
Пример #4
0
    def compute_volumes(self, sn):
        """Computes the volume of the convex hull defined by each sites' static verticies.

        Requires vertex information in the SiteNetwork.

        Adds the ``site_volumes`` and ``site_surface_areas`` attributes.

        Volumes can be NaN for degenerate hulls/point sets on which QHull fails.

        Args:
            - sn (SiteNetwork)
        """
        assert isinstance(sn, SiteNetwork)
        if sn.vertices is None:
            raise ValueError(
                "SiteNetwork must have verticies to compute volumes!")

        vols = np.empty(shape=sn.n_sites, dtype=np.float)
        areas = np.empty(shape=sn.n_sites, dtype=np.float)

        pbcc = PBCCalculator(sn.structure.cell)

        for site in range(sn.n_sites):
            pos = sn.static_structure.positions[list(sn.vertices[site])]
            if len(pos) < 4:
                if self.error_on_insufficient_coord:
                    raise InsufficientCoordinatingAtomsError(
                        "Site %i had only %i vertices (less than needed 4)" %
                        (site, len(pos)))
                else:
                    vols[site] = 0
                    areas[site] = np.nan
                    continue

            assert pos.flags[
                'OWNDATA']  # It should since we're indexing with index lists
            # Recenter
            offset = pbcc.cell_centroid - sn.centers[site]
            pos += offset
            pbcc.wrap_points(pos)

            try:
                hull = ConvexHull(pos)
                vols[site] = hull.volume
                areas[site] = hull.area
            except QhullError as qhe:
                logger.warning(
                    "Had QHull failure when computing volume of site %i" %
                    site)
                vols[site] = np.nan
                areas[site] = np.nan

        sn.add_site_attribute('site_volumes', vols)
        sn.add_site_attribute('site_surface_areas', areas)
Пример #5
0
    def compute_accessable_volumes(self, st, n_recenterings=8):
        """Computes the volumes of convex hulls around all positions associated with a site.

        Uses the shift-and-wrap trick for dealing with periodicity, so sites that
        take up the majority of the unit cell may give bogus results.

        Adds the ``accessable_site_volumes`` attribute to the ``SiteNetwork``.

        Args:
            st (SiteTrajectory)
            n_recenterings (int): How many different recenterings to try (the
                algorithm will recenter around n of the points and take the minimal
                resulting volume; this deals with cases where there is one outlier
                where recentering around it gives very bad results.)
        """
        assert isinstance(st, SiteTrajectory)
        vols = np.empty(shape=st.site_network.n_sites, dtype=np.float)
        areas = np.empty(shape=st.site_network.n_sites, dtype=np.float)

        pbcc = PBCCalculator(st.site_network.structure.cell)

        for site in range(st.site_network.n_sites):
            pos = st.real_positions_for_site(site)

            assert pos.flags['OWNDATA']

            vol = np.inf
            area = None
            for i in range(n_recenterings):
                # Recenter
                offset = pbcc.cell_centroid - pos[int(
                    i * (len(pos) / n_recenterings))]
                pos += offset
                pbcc.wrap_points(pos)

                try:
                    hull = ConvexHull(pos)
                except QhullError as qhe:
                    logger.warning("For site %i, iter %i: %s" % (site, i, qhe))
                    vols[site] = np.nan
                    areas[site] = np.nan
                    continue

                if hull.volume < vol:
                    vol = hull.volume
                    area = hull.area

            vols[site] = vol
            areas[site] = area

        st.site_network.add_site_attribute('accessable_site_volumes', vols)
Пример #6
0
    def plot_site(self, site, **kwargs):
        pbcc = PBCCalculator(self._sn.structure.cell)
        pts = self.real_positions_for_site(site).copy()
        offset = pbcc.cell_centroid - pts[3]
        pts += offset
        pbcc.wrap_points(pts)
        lattice_pos = self._sn.static_structure.positions.copy()
        lattice_pos += offset
        pbcc.wrap_points(lattice_pos)
        site_pos = self._sn.centers[site:site + 1].copy()
        site_pos += offset
        pbcc.wrap_points(site_pos)
        # Plot point cloud
        plot_points(points=pts, alpha=0.3, marker='.', color='k', **kwargs)
        # Plot site
        plot_points(points=site_pos, color='cyan', **kwargs)
        # Plot everything else
        plot_atoms(self._sn.static_structure, positions=lattice_pos, **kwargs)

        title = "Site %i/%i" % (site, len(self._sn))

        if not self._sn.site_types is None:
            title += " (type %i)" % self._sn.site_types[site]

        kwargs['ax'].set_title(title)
    def _get_sites_to_merge(self, st):
        # -- Compute jump statistics
        if not st.site_network.has_attribute('n_ij'):
            ja = JumpAnalysis()
            ja.run(st)

        pbcc = PBCCalculator(st.site_network.structure.cell)
        site_centers = st.site_network.centers

        # -- Build connectivity_matrix
        connectivity_matrix = self.connectivity_matrix_generator(
            st.site_network).copy()
        n_sites_before = st.site_network.n_sites
        assert n_sites_before == connectivity_matrix.shape[0]

        centers_before = st.site_network.centers

        # For diagnostic purposes
        no_diag_graph = connectivity_matrix.astype(dtype=np.float, copy=True)
        np.fill_diagonal(no_diag_graph, np.nan)
        # Rather arbitrary, but this is really just an alarm for if things
        # are really, really wrong
        edge_threshold = np.nanmean(
            no_diag_graph) + 3 * np.nanstd(no_diag_graph)
        n_alarming_ignored_edges = 0

        # Apply distance threshold
        for i in range(n_sites_before):
            dists = pbcc.distances(centers_before[i], centers_before[i + 1:])
            js_too_far = np.where(dists > self.distance_threshold)[0]
            js_too_far += i + 1

            if np.any(connectivity_matrix[i, js_too_far] > edge_threshold) or \
               np.any(connectivity_matrix[js_too_far, i] > edge_threshold):
                n_alarming_ignored_edges += 1

            connectivity_matrix[i, js_too_far] = 0
            connectivity_matrix[js_too_far, i] = 0  # Symmetry

        if n_alarming_ignored_edges > 0:
            logger.warning(
                "  At least %i site pairs with high (z-score > 3) fluxes were over the given distance cutoff.\n"
                "  This may or may not be a problem; but if `distance_threshold` is low, consider raising it."
                % n_alarming_ignored_edges)

        # -- Do Markov Clustering
        clusters = markov_clustering(connectivity_matrix,
                                     **self.markov_parameters)
        return clusters
Пример #8
0
    def run(self, sn):
        assert isinstance(sn, SiteNetwork)
        out = sn.copy()
        pbcc = PBCCalculator(sn.structure.cell)


        newcenters = out.centers.repeat(self.n, axis = 0)
        assert len(newcenters) == self.n * len(out.centers)
        newcenters += self.sigma * np.random.standard_normal(size = newcenters.shape)

        pbcc.wrap_points(newcenters)

        out.centers = newcenters

        return out
    def _get_sites_to_merge(self, st, threshold=0):
        sn = st.site_network

        attrmat = getattr(sn, self.attrname)
        assert attrmat.shape == (
            sn.n_sites, sn.n_sites
        ), "`attrname` doesn't seem to indicate an edge property."
        connmat = self.relation(attrmat, threshold)

        # Apply distance threshold
        if self.distance_threshold < np.inf:
            pbcc = PBCCalculator(sn.structure.cell)
            centers = sn.centers
            for i in range(sn.n_sites):
                dists = pbcc.distances(centers[i], centers[i + 1:])
                js_too_far = np.where(dists > self.distance_threshold)[0]
                js_too_far += i + 1

                connmat[i, js_too_far] = False
                connmat[js_too_far, i] = False  # Symmetry

        if self.forbid_multiple_occupancy:
            n_mobile = sn.n_mobile
            for frame in st.traj:
                frame = [s for s in frame if s >= 0]
                for site in frame:  # only known
                    # can't merge occupied site with other simulatanious occupied sites
                    connmat[site, frame] = False

        # Everything is always mergable with itself.
        np.fill_diagonal(connmat, True)

        # Get mergable groups
        n_merged_sites, labels = connected_components(
            connmat, directed=self.directed, connection=self.connection)
        # MergeSites will check pairwise distances; we just need to make it the
        # right format.
        merge_groups = []
        for lbl in range(n_merged_sites):
            merge_groups.append(np.where(labels == lbl)[0])

        return merge_groups
Пример #10
0
    def func(atoms, **kwargs):
        nonlocal pbcc, dmat, connmat, newtags, layer_mask
        # preallocate buffers
        if pbcc is None:
            pbcc = PBCCalculator(atoms.cell)
            dmat = np.empty(shape=(len(atoms), len(atoms)))
            connmat = np.empty(shape=(len(atoms), len(atoms)), dtype=np.bool)
            newtags = np.empty(shape=len(atoms), dtype=np.int)
            layer_mask = np.empty(shape=len(atoms), dtype=np.bool)

        tags = groupfunc(atoms, **kwargs)
        layers = np.unique(tags)
        layers.sort()
        newtags.fill(-1)

        pbcc.pairwise_distances(atoms.positions, out=dmat)
        np.less_equal(dmat, cutoff, out=connmat)

        agreegrp_conns = []
        nexttag = 0
        for layer in layers:
            np.equal(tags, layer, out=layer_mask)
            layer_conrows = connmat[layer_mask]
            layer_conmat = layer_conrows[:, layer_mask]
            n_groups_layer, group_tags = connected_components(layer_conmat,
                                                              directed=False)
            group_tags += nexttag
            newtags[layer_mask] = group_tags
            neighbor_groups = newtags[np.logical_or.reduce(layer_conrows,
                                                           axis=0)]
            agreegrp_conns.append(neighbor_groups)
            nexttag += n_groups_layer

        agreegrp_connmat = np.zeros(shape=(nexttag + 1, nexttag + 1),
                                    dtype=np.bool)
        for agreegrp, neighbors in enumerate(agreegrp_conns):
            agreegrp_connmat[agreegrp, neighbors] = True
        agreegrp_connmat = agreegrp_connmat[:-1, :-1]

        agreegrp_connmat |= agreegrp_connmat.T

        return newtags, np.arange(nexttag), agreegrp_connmat
Пример #11
0
def plot_atoms(atoms, positions = None, hide_species = (), wrap = False, fig = None, ax = None, i = None):

    mask = [not (e in hide_species) for e in atoms.get_chemical_symbols()]

    if positions is None:
        pts = atoms.get_positions()
    else:
        pts = positions
    pts = pts[mask]
    species = [s for i, s in enumerate(atoms.get_chemical_symbols()) if mask[i]]

    if wrap:
        pbcc = PBCCalculator(atoms.cell)
        pts = atoms.get_positions().copy()
        pbcc.wrap_points(pts)

    ax.scatter(pts[:,0], pts[:,1], pts[:,2],
               c = [color_for_species(s) for s in species],
               s = [20.0 * ase.data.covalent_radii[ase.data.atomic_numbers[s]] for s in species])


    all_cvecs = []
    whos_left = set(xrange(len(atoms.cell)))
    for i, cvec1 in enumerate(atoms.cell):
        all_cvecs.append(np.array([[0.0, 0.0, 0.0], cvec1]))
        for j, cvec2 in enumerate(atoms.cell[list(whos_left - {i})]):
            all_cvecs.append(np.array([cvec1, cvec1 + cvec2]))
    for i, cvec1 in enumerate(atoms.cell):
        start = np.sum(atoms.cell[list(whos_left - {i})], axis = 0)
        all_cvecs.append(np.array([start, start + cvec1]))

    for cvec in all_cvecs:
        ax.plot(cvec[:,0],
                cvec[:,1],
                cvec[:,2],
                color = "gray",
                alpha=0.5,
                linewidth = 0.7,
                linestyle="--")

    set_axes_equal(ax)
        def replace_with_closer(st, mobile_atom, before_site, start_frame,
                                after_site, end_frame):
            if before_site == SiteTrajectory.SITE_UNKNOWN or \
               after_site == SiteTrajectory.SITE_UNKNOWN:
                return SiteTrajectory.SITE_UNKNOWN

            if pbcc is None:
                pbcc = PBCCalculator(st.site_network.structure.cell)
            n_frames = end_frame - start_frame
            out = np.empty(shape=n_frames)
            for i in range(n_frames):
                ptbuf[0] = st.site_network.centers[before_site]
                ptbuf[1] = st.site_network.centers[after_site]
                pbcc.distances(st.real_trajectory[start_frame + i,
                                                  mobile_atom],
                               ptbuf,
                               in_place=True,
                               out=distbuf)
                if distbuf[0] < distbuf[1]:
                    out[i] = before_site
                else:
                    out[i] = after_site
            return out
Пример #13
0
        def cfunc(sn):
            jl = sn.jump_lag.copy()
            jl -= 1.0  # Center it around 1 since that's the minimum lag, 1 frame
            jl /= jump_lag_sigma
            np.square(jl, out=jl)
            jl *= -0.5
            np.exp(jl, out=jl)  # exp correctly takes the -infs to 0

            jl[sn.jump_lag > jump_lag_cutoff] = 0.

            # Distance term
            pbccalc = PBCCalculator(sn.structure.cell)
            dists = pbccalc.pairwise_distances(sn.centers)
            dmat = dists.copy()

            # We want to strongly boost the similarity of *very* close sites
            dmat /= distance_sigma
            np.square(dmat, out=dmat)
            dmat *= -0.5
            np.exp(dmat, out=dmat)

            return (sn.p_ij + jump_lag_coeff * jl) * (distance_coeff * dmat +
                                                      (1 - distance_coeff))
Пример #14
0
    def run(self, st):
        vols = np.empty(shape = st.site_network.n_sites, dtype = np.float)
        areas = np.empty(shape = st.site_network.n_sites, dtype = np.float)

        pbcc = PBCCalculator(st.site_network.structure.cell)

        for site in xrange(st.site_network.n_sites):
            pos = st.real_positions_for_site(site)

            assert pos.flags['OWNDATA']

            vol = np.inf
            area = None
            for i in xrange(self.n_recenterings):
                # Recenter
                offset = pbcc.cell_centroid - pos[int(i * (len(pos)/self.n_recenterings))]
                pos += offset
                pbcc.wrap_points(pos)

                try:
                    hull = ConvexHull(pos)
                except QhullError as qhe:
                    print "For site %i, iter %i: %s" % (site, i, qhe)
                    vols[site] = np.nan
                    areas[site] = np.nan
                    continue

                if hull.volume < vol:
                    vol = hull.volume
                    area = hull.area

            vols[site] = vol
            areas[site] = area

        st.site_network.add_site_attribute('site_volumes', vols)
        st.site_network.add_site_attribute('site_surface_areas', areas)
Пример #15
0
class LandmarkAnalysis(object):
    """Track a mobile species through a fixed lattice using landmark vectors."""
    def __init__(self,
                 clustering_algorithm='dotprod',
                 clustering_params={},
                 cutoff=2.0,
                 minimum_site_occupancy=0.1,
                 peak_evening='none',
                 weighted_site_positions=True,
                 check_for_zero_landmarks=True,
                 static_movement_threshold=1.0,
                 dynamic_lattice_mapping=False,
                 relaxed_lattice_checks=False,
                 max_mobile_per_site=1,
                 force_no_memmap=False,
                 verbose=True):
        """
        :param double cutoff: The distance cutoff for the landmark vectors. (unitless)
        :param double minimum_site_occupancy = 0.1: Minimum occupancy (% of time occupied)
            for a site to qualify as such.
        :param dict clustering_params: Parameters for the chosen clustering_algorithm
        :param str peak_evening: Whether and what kind of peak "evening" to apply;
            that is, processing that makes all large peaks in the landmark vector
            more similar in magnitude. This can help in site clustering.

            Valid options: 'none', 'clip'
        :param bool weighted_site_positions: When computing site positions, whether
            to weight the average by assignment confidence.
        :param bool check_for_zero_landmarks: Whether to check for and raise exceptions
            when all-zero landmark vectors are computed.
        :param float static_movement_threshold: (Angstrom) the maximum allowed
            distance between an instantanous static atom position and it's ideal position.
        :param bool dynamic_lattice_mapping: Whether to dynamically decide each
            frame which static atom represents each average lattice position;
            this allows the LandmarkAnalysis to deal with, say, a rare exchage of
            two static atoms that does not change the structure of the lattice.

            It does NOT allow LandmarkAnalysis to deal with lattices whose structures
            actually change over the course of the trajectory.

            In certain cases this is better delt with by MergeSitesByDynamics.
        :param int max_mobile_per_site: The maximum number of mobile atoms that can
            be assigned to a single site without throwing an error. Regardless of the
            value, assignments of more than one mobile atom to a single site will
            be recorded and reported.

            Setting this to 2 can be necessary for very diffusive, liquid-like
            materials at high temperatures.

            Statistics related to this are reported in self.avg_mobile_per_site
            and self.n_multiple_assignments.
        :param bool force_no_memmap: if True, landmark vectors will be stored only in memory.
            Only useful if access to landmark vectors after the analysis has run is desired.
        :param bool verbose: If `True`, progress bars and messages will be printed to stdout.
        """

        self._cutoff = cutoff
        self._minimum_site_occupancy = minimum_site_occupancy

        self._cluster_algo = clustering_algorithm
        self._clustering_params = clustering_params

        if not peak_evening in ['none', 'clip']:
            raise ValueError("Invalid value `%s` for peak_evening" %
                             peak_evening)
        self._peak_evening = peak_evening

        self.verbose = verbose
        self.check_for_zero_landmarks = check_for_zero_landmarks
        self.weighted_site_positions = weighted_site_positions
        self.dynamic_lattice_mapping = dynamic_lattice_mapping
        self.relaxed_lattice_checks = relaxed_lattice_checks

        self._landmark_vectors = None
        self._landmark_dimension = None

        self.static_movement_threshold = static_movement_threshold
        self.max_mobile_per_site = max_mobile_per_site

        self.force_no_memmap = force_no_memmap

        self._has_run = False

    @property
    def cutoff(self):
        return self._cutoff

    @analysis_result
    def landmark_vectors(self):
        view = self._landmark_vectors[:]
        view.flags.writeable = False
        return view

    @analysis_result
    def landmark_dimension(self):
        return self._landmark_dimension

    def run(self, sn, frames):
        """Run the landmark analysis.

        The input SiteNetwork is a network of predicted sites; it's sites will
        be used as the "basis" for the landmark vectors.

        Takes a SiteNetwork and returns a SiteTrajectory.
        """
        assert isinstance(sn, SiteNetwork)

        if self._has_run:
            raise ValueError("Cannot rerun LandmarkAnalysis!")

        if frames.shape[1:] != (sn.n_total, 3):
            raise ValueError("Wrong shape %s for frames." % frames.shape)

        if sn.vertices is None:
            raise ValueError("Input SiteNetwork must have vertices")

        n_frames = len(frames)

        if self.verbose:
            print "--- Running Landmark Analysis ---"

        # Create PBCCalculator
        self._pbcc = PBCCalculator(sn.structure.cell)

        # -- Step 1: Compute site-to-vertex distances
        self._landmark_dimension = sn.n_sites

        longest_vert_set = np.max([len(v) for v in sn.vertices])
        verts_np = np.array(
            [v + [-1] * (longest_vert_set - len(v)) for v in sn.vertices])
        site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float)
        site_vert_dists.fill(np.nan)

        for i, polyhedron in enumerate(sn.vertices):
            verts_poses = sn.static_structure.get_positions()[polyhedron]
            dists = self._pbcc.distances(sn.centers[i], verts_poses)
            site_vert_dists[i, :len(polyhedron)] = dists

        # -- Step 2: Compute landmark vectors
        if self.verbose: print "  - computing landmark vectors -"
        # Compute landmark vectors

        # The dimension of one landmark vector is the number of Voronoi regions
        shape = (n_frames * sn.n_mobile, self._landmark_dimension)

        with tempfile.NamedTemporaryFile() as mmap_backing:
            if self.force_no_memmap:
                self._landmark_vectors = np.empty(shape=shape, dtype=np.float)
            else:
                self._landmark_vectors = np.memmap(mmap_backing.name,
                                                   mode='w+',
                                                   dtype=np.float,
                                                   shape=shape)

            helpers._fill_landmark_vectors(
                self,
                sn,
                verts_np,
                site_vert_dists,
                frames,
                check_for_zeros=self.check_for_zero_landmarks,
                tqdm=tqdm)

            # -- Step 3: Cluster landmark vectors
            if self.verbose: print "  - clustering landmark vectors -"
            #  - Preprocess -
            self._do_peak_evening()

            #  - Cluster -
            cluster_func = importlib.import_module(
                "..cluster." + self._cluster_algo,
                package=__name__).do_landmark_clustering

            cluster_counts, lmk_lbls, lmk_confs = \
                cluster_func(self._landmark_vectors,
                             clustering_params = self._clustering_params,
                             min_samples = self._minimum_site_occupancy / float(sn.n_mobile),
                             verbose = self.verbose)

        if self.verbose:
            print "    Failed to assign %i%% of mobile particle positions to sites." % (
                100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls)))

        # reshape lables and confidences
        lmk_lbls.shape = (n_frames, sn.n_mobile)
        lmk_confs.shape = (n_frames, sn.n_mobile)

        n_sites = len(cluster_counts)

        if n_sites < sn.n_mobile:
            raise ValueError(
                "There are %i mobile particles, but only identified %i sites. Check clustering_params."
                % (sn.n_mobile, n_sites))

        if self.verbose:
            print "    Identified %i sites with assignment counts %s" % (
                n_sites, cluster_counts)

        # Check that multiple particles are never assigned to one site at the
        # same time, cause that would be wrong.
        n_more_than_ones = 0
        avg_mobile_per_site = 0
        divisor = 0
        for frame_i, site_frame in enumerate(lmk_lbls):
            _, counts = np.unique(site_frame[site_frame >= 0],
                                  return_counts=True)
            count_msk = counts > self.max_mobile_per_site
            if np.any(count_msk):
                raise ValueError(
                    "%i mobile particles were assigned to only %i site(s) (%s) at frame %i."
                    % (np.sum(counts[count_msk]), np.sum(count_msk),
                       np.where(count_msk)[0], frame_i))
            n_more_than_ones += np.sum(counts > 1)
            avg_mobile_per_site += np.sum(counts)
            divisor += len(counts)

        self.n_multiple_assignments = n_more_than_ones
        self.avg_mobile_per_site = avg_mobile_per_site / float(divisor)

        # -- Do output
        # - Compute site centers
        site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype)

        for site in xrange(n_sites):
            mask = lmk_lbls == site
            pts = frames[:, sn.mobile_mask][mask]
            if self.weighted_site_positions:
                site_centers[site] = self._pbcc.average(
                    pts, weights=lmk_confs[mask])
            else:
                site_centers[site] = self._pbcc.average(pts)

        # Build output obejcts
        out_sn = sn.copy()

        out_sn.centers = site_centers
        assert out_sn.vertices is None

        out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs)
        out_st.set_real_traj(frames)
        self._has_run = True

        return out_st

    # -------- "private" methods --------

    def _do_peak_evening(self):
        if self._peak_evening == 'none':
            return
        elif self._peak_evening == 'clip':
            lvec_peaks = np.max(self._landmark_vectors, axis=1)
            # Clip all peaks to the lowest "normal" (stdev.) peak
            lvec_clip = np.mean(lvec_peaks) - np.std(lvec_peaks)
            # Do the clipping
            self._landmark_vectors[
                self._landmark_vectors > lvec_clip] = lvec_clip
Пример #16
0
    def run(self, sn, frames):
        """Run the landmark analysis.

        The input SiteNetwork is a network of predicted sites; it's sites will
        be used as the "basis" for the landmark vectors.

        Takes a SiteNetwork and returns a SiteTrajectory.
        """
        assert isinstance(sn, SiteNetwork)

        if self._has_run:
            raise ValueError("Cannot rerun LandmarkAnalysis!")

        if frames.shape[1:] != (sn.n_total, 3):
            raise ValueError("Wrong shape %s for frames." % frames.shape)

        if sn.vertices is None:
            raise ValueError("Input SiteNetwork must have vertices")

        n_frames = len(frames)

        if self.verbose:
            print "--- Running Landmark Analysis ---"

        # Create PBCCalculator
        self._pbcc = PBCCalculator(sn.structure.cell)

        # -- Step 1: Compute site-to-vertex distances
        self._landmark_dimension = sn.n_sites

        longest_vert_set = np.max([len(v) for v in sn.vertices])
        verts_np = np.array(
            [v + [-1] * (longest_vert_set - len(v)) for v in sn.vertices])
        site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float)
        site_vert_dists.fill(np.nan)

        for i, polyhedron in enumerate(sn.vertices):
            verts_poses = sn.static_structure.get_positions()[polyhedron]
            dists = self._pbcc.distances(sn.centers[i], verts_poses)
            site_vert_dists[i, :len(polyhedron)] = dists

        # -- Step 2: Compute landmark vectors
        if self.verbose: print "  - computing landmark vectors -"
        # Compute landmark vectors

        # The dimension of one landmark vector is the number of Voronoi regions
        shape = (n_frames * sn.n_mobile, self._landmark_dimension)

        with tempfile.NamedTemporaryFile() as mmap_backing:
            if self.force_no_memmap:
                self._landmark_vectors = np.empty(shape=shape, dtype=np.float)
            else:
                self._landmark_vectors = np.memmap(mmap_backing.name,
                                                   mode='w+',
                                                   dtype=np.float,
                                                   shape=shape)

            helpers._fill_landmark_vectors(
                self,
                sn,
                verts_np,
                site_vert_dists,
                frames,
                check_for_zeros=self.check_for_zero_landmarks,
                tqdm=tqdm)

            # -- Step 3: Cluster landmark vectors
            if self.verbose: print "  - clustering landmark vectors -"
            #  - Preprocess -
            self._do_peak_evening()

            #  - Cluster -
            cluster_func = importlib.import_module(
                "..cluster." + self._cluster_algo,
                package=__name__).do_landmark_clustering

            cluster_counts, lmk_lbls, lmk_confs = \
                cluster_func(self._landmark_vectors,
                             clustering_params = self._clustering_params,
                             min_samples = self._minimum_site_occupancy / float(sn.n_mobile),
                             verbose = self.verbose)

        if self.verbose:
            print "    Failed to assign %i%% of mobile particle positions to sites." % (
                100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls)))

        # reshape lables and confidences
        lmk_lbls.shape = (n_frames, sn.n_mobile)
        lmk_confs.shape = (n_frames, sn.n_mobile)

        n_sites = len(cluster_counts)

        if n_sites < sn.n_mobile:
            raise ValueError(
                "There are %i mobile particles, but only identified %i sites. Check clustering_params."
                % (sn.n_mobile, n_sites))

        if self.verbose:
            print "    Identified %i sites with assignment counts %s" % (
                n_sites, cluster_counts)

        # Check that multiple particles are never assigned to one site at the
        # same time, cause that would be wrong.
        n_more_than_ones = 0
        avg_mobile_per_site = 0
        divisor = 0
        for frame_i, site_frame in enumerate(lmk_lbls):
            _, counts = np.unique(site_frame[site_frame >= 0],
                                  return_counts=True)
            count_msk = counts > self.max_mobile_per_site
            if np.any(count_msk):
                raise ValueError(
                    "%i mobile particles were assigned to only %i site(s) (%s) at frame %i."
                    % (np.sum(counts[count_msk]), np.sum(count_msk),
                       np.where(count_msk)[0], frame_i))
            n_more_than_ones += np.sum(counts > 1)
            avg_mobile_per_site += np.sum(counts)
            divisor += len(counts)

        self.n_multiple_assignments = n_more_than_ones
        self.avg_mobile_per_site = avg_mobile_per_site / float(divisor)

        # -- Do output
        # - Compute site centers
        site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype)

        for site in xrange(n_sites):
            mask = lmk_lbls == site
            pts = frames[:, sn.mobile_mask][mask]
            if self.weighted_site_positions:
                site_centers[site] = self._pbcc.average(
                    pts, weights=lmk_confs[mask])
            else:
                site_centers[site] = self._pbcc.average(pts)

        # Build output obejcts
        out_sn = sn.copy()

        out_sn.centers = site_centers
        assert out_sn.vertices is None

        out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs)
        out_st.set_real_traj(frames)
        self._has_run = True

        return out_st
Пример #17
0
    def _get_sites_to_merge(self, st, coordinating_mask = None):
        sn = st.site_network

        # -- Compute jump statistics
        if not sn.has_attribute('n_ij'):
            ja = JumpAnalysis()
            ja.run(st)

        pos = sn.centers
        if coordinating_mask is None:
            coordinating_mask = sn.static_mask
        else:
            assert not np.any(coordinating_mask & sn.mobile_mask)
        # -- Build images
        mobile_idex = np.where(sn.mobile_mask)[0][0]
        one_mobile_structure = sn.structure[coordinating_mask]
        one_mobile_structure.extend(sn.structure[mobile_idex])
        mobile_idex = -1
        one_mobile_structure.set_calculator(self.calculator)
        interpolation_coeffs = np.linspace(0, 1, self.n_driven_images)
        energies = np.empty(shape = self.n_driven_images)

        # -- Decide on pairs to check
        pbcc = PBCCalculator(sn.structure.cell)
        dists = pbcc.pairwise_distances(pos)
        # At the start, all within distance cutoff are mergable
        mergable = dists <= self.maximum_pairwise_distance
        mergable &= sn.n_ij >= self.minimum_jumps_mergable

        # -- Check pairs' barriers
        # Symmetric, and diagonal is trivially true. Combinations avoids those cases.
        jbuf = pos[0].copy()
        first_calculate = True
        mergable_pairs = (p for p in itertools.combinations(range(sn.n_sites), r = 2) if mergable[p] or mergable[p[1], p[0]])
        n_mergable = (np.sum(mergable) - sn.n_sites) // 2
        for i, j in tqdm(mergable_pairs, total = n_mergable):
            jbuf[:] = pos[j]
            # Get minimage
            _ = pbcc.min_image(pos[i], jbuf)
            # Do coordinate driving
            vector = jbuf - pos[i]
            for image_i in range(self.n_driven_images):
                one_mobile_structure.positions[mobile_idex] = vector
                one_mobile_structure.positions[mobile_idex] *= interpolation_coeffs[image_i]
                one_mobile_structure.positions[mobile_idex] += pos[i]
                energies[image_i] = one_mobile_structure.get_potential_energy()
                first_calculate = False
            # Check barrier
            barrier_idex = np.argmax(energies)
            forward_barrier = energies[barrier_idex] - energies[0]
            backward_barrier = energies[barrier_idex] - energies[-1]
            # If it's an actual maxima barrier between them, then we want to
            # check its height
            if barrier_idex != 0 and barrier_idex != self.n_driven_images - 1:
                mergable[i, j] = forward_barrier <= self.barrier_threshold
                mergable[j, i] = backward_barrier <= self.barrier_threshold
            # Otherwise, if there's no maxima between them, they are in the same
            # basin.

        # Get mergable groups
        n_merged_sites, labels = connected_components(
            mergable,
            directed = True,
            connection = 'strong'
        )
        # MergeSites will check pairwise distances; we just need to make it the
        # right format.
        merge_groups = []
        for lbl in range(n_merged_sites):
            merge_groups.append(np.where(labels == lbl)[0])

        return merge_groups
Пример #18
0
    def run(self, st, **kwargs):
        """Takes a ``SiteTrajectory`` and returns a new ``SiteTrajectory``."""

        if self.check_types and st.site_network.site_types is None:
            raise ValueError(
                "Cannot run a check_types=True MergeSites on a SiteTrajectory without type information."
            )

        # -- Compute jump statistics
        pbcc = PBCCalculator(st.site_network.structure.cell)
        site_centers = st.site_network.centers
        if self.check_types:
            site_types = st.site_network.site_types

        clusters = self._get_sites_to_merge(st, **kwargs)

        old_n_sites = st.site_network.n_sites
        new_n_sites = len(clusters)

        logger.info(
            "After merging %i sites there will be %i sites for %i mobile particles"
            % (len(site_centers), new_n_sites, st.site_network.n_mobile))

        if new_n_sites < st.site_network.n_mobile:
            raise InsufficientSitesError(verb="Merging",
                                         n_sites=new_n_sites,
                                         n_mobile=st.site_network.n_mobile)

        if self.check_types:
            new_types = np.empty(shape=new_n_sites, dtype=np.int)
        merge_verts = st.site_network.vertices is not None
        if merge_verts:
            new_verts = []

        # -- Merge Sites
        new_centers = np.empty(shape=(new_n_sites, 3),
                               dtype=st.site_network.centers.dtype)
        translation = np.empty(shape=st.site_network.n_sites, dtype=np.int)
        translation.fill(-1)

        for newsite in range(new_n_sites):
            mask = list(clusters[newsite])
            # Update translation table
            if np.any(translation[mask] != -1):
                # We've assigned a different cluster for this before... weird
                # degeneracy
                raise ValueError(
                    "Site merging tried to merge site(s) into more than one new site. This shouldn't happen."
                )
            translation[mask] = newsite

            to_merge = site_centers[mask]

            # Check distances
            if not self.maximum_merge_distance is None:
                dists = pbcc.distances(to_merge[0], to_merge[1:])
                if not np.all(dists <= self.maximum_merge_distance):
                    raise MergedSitesTooDistantError(
                        "Markov clustering tried to merge sites more than %.2f apart. Lower your distance_threshold?"
                        % self.maximum_merge_distance)

            # New site center
            if self.weighted_spatial_average:
                new_centers[newsite] = pbcc.average(to_merge)
            else:
                occs = st.site_network.occupancies[mask]
                new_centers[newsite] = pbcc.average(to_merge, weights=occs)

            if self.check_types:
                assert np.all(site_types[mask] == site_types[mask][0])
                new_types[newsite] = site_types[mask][0]
            if merge_verts:
                new_verts.append(
                    set.union(
                        *[set(st.site_network.vertices[i]) for i in mask]))

        newsn = st.site_network.copy()
        newsn.centers = new_centers
        if self.check_types:
            newsn.site_types = new_types
        if merge_verts:
            newsn.vertices = new_verts

        newtraj = translation[st._traj]
        newtraj[st._traj ==
                SiteTrajectory.SITE_UNKNOWN] = SiteTrajectory.SITE_UNKNOWN

        # It doesn't make sense to propagate confidence information through a
        # transform that might completely invalidate it
        newst = SiteTrajectory(newsn, newtraj, confidences=None)

        if not st.real_trajectory is None:
            newst.set_real_traj(st.real_trajectory)

        if self.set_merged_into:
            if st.site_network.has_attribute("merged_into"):
                st.site_network.remove_attribute("merged_into")
            st.site_network.add_site_attribute("merged_into", translation)

        return newst
Пример #19
0
    def run(self, st):
        """Takes a SiteTrajectory and returns a SiteTrajectory, including a new SiteNetwork."""

        if self.check_types and st.site_network.site_types is None:
            raise ValueError(
                "Cannot run a check_types=True MergeSitesByDynamics on a SiteTrajectory without type information."
            )

        # Compute jump statistics
        if not st.site_network.has_attribute('p_ij'):
            ja = JumpAnalysis(verbose=self.verbose)
            ja.run(st)

        pbcc = PBCCalculator(st.site_network.structure.cell)
        site_centers = st.site_network.centers
        if self.check_types:
            site_types = st.site_network.site_types

        connectivity_matrix = st.site_network.p_ij
        assert st.site_network.n_sites == connectivity_matrix.shape[0]

        clusters = self._markov_clustering(connectivity_matrix,
                                           **self.markov_parameters)

        new_n_sites = len(clusters)

        if self.verbose:
            print "After merge there will be %i sites" % new_n_sites

        if self.check_types:
            new_types = np.empty(shape=new_n_sites, dtype=np.int)

        new_centers = np.empty(shape=(new_n_sites, 3),
                               dtype=st.site_network.centers.dtype)
        translation = np.empty(shape=st.site_network.n_sites, dtype=np.int)
        translation.fill(-1)

        for newsite in xrange(new_n_sites):
            mask = list(clusters[newsite])
            # Update translation table
            if np.any(translation[mask] != -1):
                # We've assigned a different cluster for this before... weird
                # degeneracy
                raise ValueError(
                    "Markov clustering tried to merge site(s) into more than one new site"
                )
            translation[mask] = newsite

            to_merge = site_centers[mask]

            # Check distances
            dists = pbcc.distances(to_merge[0], to_merge[1:])

            assert np.all(
                dists < self.distance_threshold
            ), "Markov clustering tried to merge sites more than %f apart -- this may be valid, and the distance threshold may need to be increased." % self.distance_threshold

            # New site center
            new_centers[newsite] = pbcc.average(to_merge)
            if self.check_types:
                assert np.all(site_types[mask] == site_types[mask][0])
                new_types[newsite] = site_types[mask][0]

        newsn = st.site_network.copy()
        newsn.centers = new_centers
        if self.check_types:
            newsn.site_types = new_types

        newtraj = translation[st._traj]
        newtraj[st._traj ==
                SiteTrajectory.SITE_UNKNOWN] = SiteTrajectory.SITE_UNKNOWN

        # It doesn't make sense to propagate confidence information through a
        # transform that might completely invalidate it
        newst = SiteTrajectory(newsn, newtraj, confidences=None)

        if not st.real_trajectory is None:
            newst.set_real_traj(st.real_trajectory)

        return newst
Пример #20
0
    def _plot_edges(self, sn, ax = None, *args, **kwargs):
        if not 'intensity' in self.edge_mappings:
            return []

        pbcc = PBCCalculator(sn.structure.cell)

        n_sites = sn.n_sites
        centers = sn.centers

        # -- Edge attributes
        all_cs = None
        all_linewidths = None
        all_color = None
        all_groups = None
        # Get value arrays as they exist
        for edgekey in self.edge_mappings:
            edgeval = getattr(sn, self.edge_mappings[edgekey])
            if edgekey == 'intensity':
                all_cs = edgeval.copy()
            elif edgekey == 'width':
                all_linewidths = edgeval.copy()
            elif edgekey == 'group':
                assert edgeval.dtype == np.int
                all_groups = edgeval
            else:
                raise KeyError("Invalid edge mapping key `%s`" % edgekey)

        do_widths = not all_linewidths is None
        do_groups = not all_groups is None

        # - Normalize
        # Ignore values on the diagonal since we ignore them in the loop
        diag_mask = np.ones(shape = all_cs.shape, dtype = np.bool)
        np.fill_diagonal(diag_mask, False)

        self._normalize(all_cs, diag_mask)

        if do_widths:
            self._normalize(all_linewidths, diag_mask)

        # -- Construct Line3DCollection segments

        # Whether an edge has already been added
        done_already = np.zeros(shape = (n_sites, n_sites), dtype = np.bool)
        # For the Line3DCollection
        segments = []
        cs = []
        linewidths = []
        groups = []
        # To plot minimum images that are outside unit cell
        sites_to_plot = []
        sites_to_plot_positions = []

        for i in range(n_sites):
            for j in range(n_sites):
                # No self edges
                if i == j:
                    continue
                # If was already done
                if done_already[i, j]:
                    continue
                # Ignore anything below the threshold
                if all_cs[i, j] <= self.min_color_threshold:
                    continue
                if do_widths and all_linewidths[i, j] <= self.min_width_threshold:
                    continue

                segment = np.empty(shape = (2, 3), dtype = centers.dtype)
                segment[0] = centers[i]
                ptbuf = centers[j].copy()

                # Modified segment[1] in place
                minimg = pbcc.min_image(segment[0], ptbuf)
                was_already_min_img = minimg == 111

                segment[1] = ptbuf

                segments.append(segment)

                # If they are eachother's minimum image, then don't bother plotting
                # j -> i
                if was_already_min_img:
                    done_already[j, i] = True
                else:
                    # We'll plot it
                    sites_to_plot.append(j)
                    sites_to_plot_positions.append(segment[1])

                # The mean
                cs.append(np.mean([all_cs[i, j], all_cs[j, i]]))

                if do_widths:
                    linewidths.append(np.mean([all_linewidths[i, j], all_linewidths[j, i]]))
                if do_groups:
                    # Assumes symmetric
                    groups.append(all_groups[i, j])

                done_already[i, j] = True

        # -- Construct final Line3DCollection
        assert len(cs) == len(segments)

        if len(cs) > 0:
            lccolors = np.empty(shape = (len(cs), 4), dtype = np.float)
            # Group colors
            if do_groups:
                for i in range(len(cs)):
                    if groups[i] >= len(SiteNetworkPlotter.EDGE_GROUP_COLORS) - 1:
                        raise ValueError("Too many groups, not enough group colors")
                    lccolors[i] = matplotlib.colors.to_rgba(SiteNetworkPlotter.EDGE_GROUP_COLORS[groups[i]])
            else:
                lccolors[:] = matplotlib.colors.to_rgba(SiteNetworkPlotter.EDGE_GROUP_COLORS[0])
            # Intensity alpha
            lccolors[:,3] = np.array(cs) * self.minmax_edge_alpha[1]
            lccolors[:,3] += self.minmax_edge_alpha[0]

            if do_widths:
                linewidths = np.asarray(linewidths)
                linewidths *= self.minmax_linewidth[1]
                linewidths += self.minmax_linewidth[0]
            else:
                linewidths = self.minmax_linewidth[1] * 0.5

            lc = Line3DCollection(segments, linewidths = linewidths, colors = lccolors, zorder = -20)
            ax.add_collection(lc)

            # -- Plot new sites
            if len(sites_to_plot) > 0:
                sn2 = sn[sites_to_plot]
                sn2.update_centers(np.asarray(sites_to_plot_positions))
                pts_params = dict(self.plot_points_params)
                pts_params['alpha'] = 0.2
                return self._site_layers(sn2, pts_params, same_normalization = True)
            else:
                return []
        else:
            return []
Пример #21
0
def periodic_voronoi(structure, logfile=sys.stdout):
    """
    :param ASE.Atoms structure:
    """

    pbcc = PBCCalculator(structure.cell)

    # Make a 3x3x3 supercell
    supercell = structure.repeat((3, 3, 3))

    qhull_output = None

    logfile.write("Qvoronoi ---")

    # Run qhull
    with tempfile.NamedTemporaryFile('w',
                                     prefix = 'qvor',
                                     suffix='.in', delete = False) as infile, \
         tempfile.NamedTemporaryFile('r',
                                     prefix = 'qvor',
                                     suffix='.out',
                                     delete=True) as outfile:
        #  -- Write input file --
        infile.write("3\n")  # num of dimensions
        infile.write("%i\n" % len(supercell))  # num of points
        np.savetxt(infile, supercell.get_positions(), fmt='%.16f')
        infile.flush()

        cmdline = [
            "qvoronoi", "TI", infile.name, "FF", "Fv", "TO", outfile.name
        ]
        process = subprocess.Popen(cmdline,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.STDOUT)
        retcode = process.wait()
        logfile.write(process.stdout.read())
        if retcode != 0:
            raise RuntimeError("qvoronoi returned exit code %i" % retcode)

        qhull_output = outfile.read()

    facets_regex = re.compile(
        """
                -[ \t](?P<facetkey>f[0-9]+)  [\n]
                [ \t]*-[ ]flags: .* [\n]
                [ \t]*-[ ]normal: .* [\n]
                [ \t]*-[ ]offset: .* [\n]
                [ \t]*-[ ]center:(?P<center>([ ][\-]?[0-9]*[\.]?[0-9]*(e[-?[0-9]+)?){3}) [ \t] [\n]
                [ \t]*-[ ]vertices:(?P<vertices>([ ]p[0-9]+\(v[0-9]+\))+) [ \t]? [\n]
                [ \t]*-[ ]neighboring[ ]facets:(?P<neighbors>([ ]f[0-9]+)+)
                """, re.X | re.M)

    vertices_re = re.compile('(?<=p)[0-9]+')

    # Allocate stuff
    centers = []
    vertices = []
    facet_indexes_taken = set()

    facet_index_to_our_index = {}
    all_facets_centers = []

    # ---- Read facets
    facet_index = -1
    next_our_index = 0
    for facet_match in facets_regex.finditer(qhull_output):
        center = np.asarray(map(float, facet_match.group('center').split()))
        facet_index += 1

        all_facets_centers.append(center)

        if not pbcc.is_in_image_of_cell(center, (1, 1, 1)):
            continue

        verts = map(int, vertices_re.findall(facet_match.group('vertices')))
        verts_in_main_cell = tuple(v % len(structure) for v in verts)

        facet_indexes_taken.add(facet_index)

        centers.append(center)
        vertices.append(verts_in_main_cell)

        facet_index_to_our_index[facet_index] = next_our_index

        next_our_index += 1

        end_of_facets = facet_match.end()

    facet_count = facet_index + 1

    logfile.write("  qhull gave %i vertices; kept %i" %
                  (facet_count, len(centers)))

    # ---- Read ridges
    qhull_output_after_facets = qhull_output[end_of_facets:].strip()
    ridge_re = re.compile('^\d+ \d+ \d+(?P<verts>( \d+)+)$', re.M)

    ridges = [[int(v) for v in match.group('verts').split()]
              for match in ridge_re.finditer(qhull_output_after_facets)]
    # only take ridges with at least 1 facet in main unit cell.
    ridges = [r for r in ridges if any(f in facet_indexes_taken for f in r)]

    # shift centers back into normal unit cell
    centers -= np.sum(structure.cell, axis=0)

    nearest_center = KDTree(centers)

    ridges_in_main_cell = set()
    threw_out = 0
    for r in ridges:
        ridge_centers = np.asarray(
            [all_facets_centers[f] for f in r if f < len(all_facets_centers)])
        if not pbcc.all_in_unit_cell(ridge_centers):
            continue

        pbcc.wrap_points(ridge_centers)
        dists, ridge_centers_in_main = nearest_center.query(
            ridge_centers, return_distance=True)

        if np.any(dists > 0.00001):
            threw_out += 1
            continue

        assert ridge_centers_in_main.shape == (
            len(ridge_centers), 1), "%s" % ridge_centers_in_main.shape
        ridge_centers_in_main = ridge_centers_in_main[:, 0]

        ridges_in_main_cell.add(frozenset(ridge_centers_in_main))

    logfile.write("  Threw out %i ridges" % threw_out)

    logfile.flush()

    return centers, vertices, ridges_in_main_cell
Пример #22
0
    def run(self, sn, frames):
        """Run the landmark analysis.

        The input ``SiteNetwork`` is a network of predicted sites; it's sites will
        be used as the "basis" for the landmark vectors.

        Wraps a copy of ``frames`` into the unit cell.

        Args:
            sn (SiteNetwork): The landmark basis. Each site is a landmark defined
                by its vertex static atoms, as indicated by `sn.vertices`.
                (Typically from ``VoronoiSiteGenerator``.)
            frames (ndarray n_frames x n_atoms x 3): A trajectory. Can be unwrapped;
                a copy will be wrapped before the analysis.
        """
        assert isinstance(sn, SiteNetwork)

        if self._has_run:
            raise ValueError("Cannot rerun LandmarkAnalysis!")

        if frames.shape[1:] != (sn.n_total, 3):
            raise ValueError("Wrong shape %s for frames." % (frames.shape, ))

        if sn.vertices is None:
            raise ValueError("Input SiteNetwork must have vertices")

        n_frames = len(frames)

        logger.info("--- Running Landmark Analysis ---")

        # Create PBCCalculator
        self._pbcc = PBCCalculator(sn.structure.cell)

        # -- Step 0: Wrap to Unit Cell
        orig_frames = frames  # Keep a reference around
        frames = frames.copy()
        # Flatten to list of points for wrapping
        orig_frame_shape = frames.shape
        frames.shape = (orig_frame_shape[0] * orig_frame_shape[1], 3)
        self._pbcc.wrap_points(frames)
        # Back to list of frames
        frames.shape = orig_frame_shape

        # -- Step 1: Compute site-to-vertex distances
        self._landmark_dimension = sn.n_sites

        longest_vert_set = np.max([len(v) for v in sn.vertices])
        verts_np = np.array([
            np.concatenate((v, [-1] * (longest_vert_set - len(v))))
            for v in sn.vertices
        ],
                            dtype=np.int)
        site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float)
        site_vert_dists.fill(np.nan)

        for i, polyhedron in enumerate(sn.vertices):
            verts_poses = sn.static_structure.get_positions()[polyhedron]
            dists = self._pbcc.distances(sn.centers[i], verts_poses)
            site_vert_dists[i, :len(polyhedron)] = dists

        # -- Step 2: Compute landmark vectors
        logger.info("  - computing landmark vectors -")
        # Compute landmark vectors

        # The dimension of one landmark vector is the number of Voronoi regions
        shape = (n_frames * sn.n_mobile, self._landmark_dimension)

        with tempfile.NamedTemporaryFile() as mmap_backing:
            if self.force_no_memmap:
                self._landmark_vectors = np.empty(shape=shape, dtype=np.float)
            else:
                self._landmark_vectors = np.memmap(mmap_backing.name,
                                                   mode='w+',
                                                   dtype=np.float,
                                                   shape=shape)

            helpers._fill_landmark_vectors(
                self,
                sn,
                verts_np,
                site_vert_dists,
                frames,
                check_for_zeros=self.check_for_zero_landmarks,
                tqdm=tqdm,
                logger=logger)

            if not self.check_for_zero_landmarks and self.n_all_zero_lvecs > 0:
                logger.warning(
                    "     Had %i all-zero landmark vectors; no error because `check_for_zero_landmarks = False`."
                    % self.n_all_zero_lvecs)
            elif self.check_for_zero_landmarks:
                assert self.n_all_zero_lvecs == 0

            # -- Step 3: Cluster landmark vectors
            logger.info("  - clustering landmark vectors -")

            #  - Cluster -
            # FIXME: remove reload after development done
            clustermod = importlib.import_module("..cluster." +
                                                 self._cluster_algo,
                                                 package=__name__)
            importlib.reload(clustermod)
            cluster_func = clustermod.do_landmark_clustering

            clustering = \
                cluster_func(self._landmark_vectors,
                             clustering_params = self._clustering_params,
                             min_samples = self._minimum_site_occupancy / float(sn.n_mobile),
                             verbose = self.verbose)

        cluster_counts = clustering[LandmarkAnalysis.CLUSTERING_CLUSTER_SIZE]
        lmk_lbls = clustering[LandmarkAnalysis.CLUSTERING_LABELS]
        lmk_confs = clustering[LandmarkAnalysis.CLUSTERING_CONFIDENCES]
        if LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS in clustering:
            landmark_clusters = clustering[
                LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS]
            assert len(cluster_counts) == len(landmark_clusters)
        else:
            landmark_clusters = None
        if LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS in clustering:
            rep_lvecs = np.asarray(clustering[
                LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS])
            assert rep_lvecs.shape == (len(cluster_counts),
                                       self._landmark_vectors.shape[1])
        else:
            rep_lvecs = None

        logging.info(
            "    Failed to assign %i%% of mobile particle positions to sites."
            % (100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls))))

        # reshape lables and confidences
        lmk_lbls.shape = (n_frames, sn.n_mobile)
        lmk_confs.shape = (n_frames, sn.n_mobile)

        n_sites = len(cluster_counts)

        if n_sites < (sn.n_mobile / self.max_mobile_per_site):
            raise InsufficientSitesError(verb="Landmark analysis",
                                         n_sites=n_sites,
                                         n_mobile=sn.n_mobile)

        logging.info("    Identified %i sites with assignment counts %s" %
                     (n_sites, cluster_counts))

        # -- Do output
        out_sn = sn.copy()
        # - Compute site centers
        site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype)
        if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED or \
           self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_UNWEIGHTED:
            for site in range(n_sites):
                mask = lmk_lbls == site
                pts = frames[:, sn.mobile_mask][mask]
                if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED:
                    site_centers[site] = self._pbcc.average(
                        pts, weights=lmk_confs[mask])
                else:
                    site_centers[site] = self._pbcc.average(pts)
        elif self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REPRESENTATIVE_LANDMARK:
            if rep_lvecs is None:
                raise ValueError(
                    "Chosen clustering method (with current parameters) didn't return representative landmark vectors; can't use SITE_CENTERS_REPRESENTATIVE_LANDMARK."
                )
            for site in range(n_sites):
                weights_nonzero = rep_lvecs[site] > 0
                site_centers[site] = self._pbcc.average(
                    sn.centers[weights_nonzero],
                    weights=rep_lvecs[site, weights_nonzero])
        else:
            raise ValueError("Invalid site centers method '%s'" %
                             self.site_centers_method)
        out_sn.centers = site_centers
        # - If clustering gave us that, compute site vertices
        if landmark_clusters is not None:
            vertices = []
            for lclust in landmark_clusters:
                vertices.append(
                    set.union(*[set(sn.vertices[l]) for l in lclust]))
            out_sn.vertices = vertices

        out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs)

        # Check that multiple particles are never assigned to one site at the
        # same time, cause that would be wrong.
        self.n_multiple_assignments, self.avg_mobile_per_site = out_st.check_multiple_occupancy(
            max_mobile_per_site=self.max_mobile_per_site)

        out_st.set_real_traj(orig_frames)
        self._has_run = True

        return out_st
Пример #23
0
class LandmarkAnalysis(object):
    """Site analysis of mobile atoms in a static lattice with landmark analysis.

    :param double cutoff_center: The midpoint for the logistic function used
        as the landmark cutoff function. (unitless)
    :param double cutoff_steepness: Steepness of the logistic cutoff function.
    :param double minimum_site_occupancy = 0.1: Minimum occupancy (% of time occupied)
        for a site to qualify as such.
    :param str clustering_algorithm: The landmark clustering algorithm. ``sitator``
        supplies two:
         - ``"dotprod"``: The method described in our "Unsupervised landmark
            analysis for jump detection in molecular dynamics simulations" paper.
         - ``"mcl"``: A newer method we are developing.
    :param dict clustering_params: Parameters for the chosen ``clustering_algorithm``.
    :param str site_centers_method: The method to use for computing the real
        space positions of the sites. Options:
         - ``SITE_CENTERS_REAL_UNWEIGHTED``: A spatial average of all real-space
            mobile atom positions assigned to the site is taken.
         - ``SITE_CENTERS_REAL_WEIGHTED``: A spatial average of all real-space
            mobile atom positions assigned to the site is taken, weighted
            by the confidences with which they assigned to the site.
         - ``SITE_CENTERS_REPRESENTATIVE_LANDMARK``: A spatial average over
            all landmarks' centers is taken, weighted by the representative
            or "typical" landmark vector at the site.
        The "real" methods will generally be more faithful to the simulation,
        but the representative landmark method can work better in cases with
        short trajectories, producing a more "ideal" site location.
    :param bool check_for_zero_landmarks: Whether to check for and raise exceptions
        when all-zero landmark vectors are computed.
    :param float static_movement_threshold: (Angstrom) the maximum allowed
        distance between an instantanous static atom position and it's ideal position.
    :param bool dynamic_lattice_mapping: Whether to dynamically decide each
        frame which static atom represents each average lattice position;
        this allows the LandmarkAnalysis to deal with, say, a rare exchage of
        two static atoms that does not change the structure of the lattice.

        It does NOT allow LandmarkAnalysis to deal with lattices whose structures
        actually change over the course of the trajectory.

        In certain cases this is better delt with by ``MergeSitesByDynamics``.
    :param int max_mobile_per_site: The maximum number of mobile atoms that can
        be assigned to a single site without throwing an error. Regardless of the
        value, assignments of more than one mobile atom to a single site will
        be recorded and reported.

        Setting this to 2 can be necessary for very diffusive, liquid-like
        materials at high temperatures.

        Statistics related to this are reported in ``self.avg_mobile_per_site``
        and ``self.n_multiple_assignments``.
    :param bool force_no_memmap: if True, landmark vectors will be stored only in memory.
        Only useful if access to landmark vectors after the analysis has run is desired.
    :param bool verbose: Verbosity for the ``clustering_algorithm``. Other output
        controlled through ``logging``.
    """

    SITE_CENTERS_REAL_UNWEIGHTED = 'real-unweighted'
    SITE_CENTERS_REAL_WEIGHTED = 'real-weighted'
    SITE_CENTERS_REPRESENTATIVE_LANDMARK = 'representative-landmark'

    CLUSTERING_CLUSTER_SIZE = 'cluster-size'
    CLUSTERING_LABELS = 'cluster-labels'
    CLUSTERING_CONFIDENCES = 'cluster-confs'
    CLUSTERING_LANDMARK_GROUPINGS = 'cluster-landmark-groupings'
    CLUSTERING_REPRESENTATIVE_LANDMARKS = 'cluster-representative-lvecs'

    def __init__(self,
                 clustering_algorithm='dotprod',
                 clustering_params={},
                 cutoff_midpoint=1.5,
                 cutoff_steepness=30,
                 minimum_site_occupancy=0.01,
                 site_centers_method=SITE_CENTERS_REAL_WEIGHTED,
                 check_for_zero_landmarks=True,
                 static_movement_threshold=1.0,
                 dynamic_lattice_mapping=False,
                 relaxed_lattice_checks=False,
                 max_mobile_per_site=1,
                 force_no_memmap=False,
                 verbose=True):
        self._cutoff_midpoint = cutoff_midpoint
        self._cutoff_steepness = cutoff_steepness
        self._minimum_site_occupancy = minimum_site_occupancy

        self._cluster_algo = clustering_algorithm
        self._clustering_params = clustering_params

        self.verbose = verbose
        self.check_for_zero_landmarks = check_for_zero_landmarks
        self.site_centers_method = site_centers_method
        self.dynamic_lattice_mapping = dynamic_lattice_mapping
        self.relaxed_lattice_checks = relaxed_lattice_checks

        self._landmark_vectors = None
        self._landmark_dimension = None

        self.static_movement_threshold = static_movement_threshold
        self.max_mobile_per_site = max_mobile_per_site

        self.force_no_memmap = force_no_memmap

        self._has_run = False

    @property
    def cutoff(self):
        return self._cutoff

    @analysis_result
    def landmark_vectors(self):
        """Landmark vectors from the last invocation of ``run()``"""
        view = self._landmark_vectors[:]
        view.flags.writeable = False
        return view

    @analysis_result
    def landmark_dimension(self):
        """Number of components in a single landmark vector."""
        return self._landmark_dimension

    def run(self, sn, frames):
        """Run the landmark analysis.

        The input ``SiteNetwork`` is a network of predicted sites; it's sites will
        be used as the "basis" for the landmark vectors.

        Wraps a copy of ``frames`` into the unit cell.

        Args:
            sn (SiteNetwork): The landmark basis. Each site is a landmark defined
                by its vertex static atoms, as indicated by `sn.vertices`.
                (Typically from ``VoronoiSiteGenerator``.)
            frames (ndarray n_frames x n_atoms x 3): A trajectory. Can be unwrapped;
                a copy will be wrapped before the analysis.
        """
        assert isinstance(sn, SiteNetwork)

        if self._has_run:
            raise ValueError("Cannot rerun LandmarkAnalysis!")

        if frames.shape[1:] != (sn.n_total, 3):
            raise ValueError("Wrong shape %s for frames." % (frames.shape, ))

        if sn.vertices is None:
            raise ValueError("Input SiteNetwork must have vertices")

        n_frames = len(frames)

        logger.info("--- Running Landmark Analysis ---")

        # Create PBCCalculator
        self._pbcc = PBCCalculator(sn.structure.cell)

        # -- Step 0: Wrap to Unit Cell
        orig_frames = frames  # Keep a reference around
        frames = frames.copy()
        # Flatten to list of points for wrapping
        orig_frame_shape = frames.shape
        frames.shape = (orig_frame_shape[0] * orig_frame_shape[1], 3)
        self._pbcc.wrap_points(frames)
        # Back to list of frames
        frames.shape = orig_frame_shape

        # -- Step 1: Compute site-to-vertex distances
        self._landmark_dimension = sn.n_sites

        longest_vert_set = np.max([len(v) for v in sn.vertices])
        verts_np = np.array([
            np.concatenate((v, [-1] * (longest_vert_set - len(v))))
            for v in sn.vertices
        ],
                            dtype=np.int)
        site_vert_dists = np.empty(shape=verts_np.shape, dtype=np.float)
        site_vert_dists.fill(np.nan)

        for i, polyhedron in enumerate(sn.vertices):
            verts_poses = sn.static_structure.get_positions()[polyhedron]
            dists = self._pbcc.distances(sn.centers[i], verts_poses)
            site_vert_dists[i, :len(polyhedron)] = dists

        # -- Step 2: Compute landmark vectors
        logger.info("  - computing landmark vectors -")
        # Compute landmark vectors

        # The dimension of one landmark vector is the number of Voronoi regions
        shape = (n_frames * sn.n_mobile, self._landmark_dimension)

        with tempfile.NamedTemporaryFile() as mmap_backing:
            if self.force_no_memmap:
                self._landmark_vectors = np.empty(shape=shape, dtype=np.float)
            else:
                self._landmark_vectors = np.memmap(mmap_backing.name,
                                                   mode='w+',
                                                   dtype=np.float,
                                                   shape=shape)

            helpers._fill_landmark_vectors(
                self,
                sn,
                verts_np,
                site_vert_dists,
                frames,
                check_for_zeros=self.check_for_zero_landmarks,
                tqdm=tqdm,
                logger=logger)

            if not self.check_for_zero_landmarks and self.n_all_zero_lvecs > 0:
                logger.warning(
                    "     Had %i all-zero landmark vectors; no error because `check_for_zero_landmarks = False`."
                    % self.n_all_zero_lvecs)
            elif self.check_for_zero_landmarks:
                assert self.n_all_zero_lvecs == 0

            # -- Step 3: Cluster landmark vectors
            logger.info("  - clustering landmark vectors -")

            #  - Cluster -
            # FIXME: remove reload after development done
            clustermod = importlib.import_module("..cluster." +
                                                 self._cluster_algo,
                                                 package=__name__)
            importlib.reload(clustermod)
            cluster_func = clustermod.do_landmark_clustering

            clustering = \
                cluster_func(self._landmark_vectors,
                             clustering_params = self._clustering_params,
                             min_samples = self._minimum_site_occupancy / float(sn.n_mobile),
                             verbose = self.verbose)

        cluster_counts = clustering[LandmarkAnalysis.CLUSTERING_CLUSTER_SIZE]
        lmk_lbls = clustering[LandmarkAnalysis.CLUSTERING_LABELS]
        lmk_confs = clustering[LandmarkAnalysis.CLUSTERING_CONFIDENCES]
        if LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS in clustering:
            landmark_clusters = clustering[
                LandmarkAnalysis.CLUSTERING_LANDMARK_GROUPINGS]
            assert len(cluster_counts) == len(landmark_clusters)
        else:
            landmark_clusters = None
        if LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS in clustering:
            rep_lvecs = np.asarray(clustering[
                LandmarkAnalysis.CLUSTERING_REPRESENTATIVE_LANDMARKS])
            assert rep_lvecs.shape == (len(cluster_counts),
                                       self._landmark_vectors.shape[1])
        else:
            rep_lvecs = None

        logging.info(
            "    Failed to assign %i%% of mobile particle positions to sites."
            % (100.0 * np.sum(lmk_lbls < 0) / float(len(lmk_lbls))))

        # reshape lables and confidences
        lmk_lbls.shape = (n_frames, sn.n_mobile)
        lmk_confs.shape = (n_frames, sn.n_mobile)

        n_sites = len(cluster_counts)

        if n_sites < (sn.n_mobile / self.max_mobile_per_site):
            raise InsufficientSitesError(verb="Landmark analysis",
                                         n_sites=n_sites,
                                         n_mobile=sn.n_mobile)

        logging.info("    Identified %i sites with assignment counts %s" %
                     (n_sites, cluster_counts))

        # -- Do output
        out_sn = sn.copy()
        # - Compute site centers
        site_centers = np.empty(shape=(n_sites, 3), dtype=frames.dtype)
        if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED or \
           self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_UNWEIGHTED:
            for site in range(n_sites):
                mask = lmk_lbls == site
                pts = frames[:, sn.mobile_mask][mask]
                if self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REAL_WEIGHTED:
                    site_centers[site] = self._pbcc.average(
                        pts, weights=lmk_confs[mask])
                else:
                    site_centers[site] = self._pbcc.average(pts)
        elif self.site_centers_method == LandmarkAnalysis.SITE_CENTERS_REPRESENTATIVE_LANDMARK:
            if rep_lvecs is None:
                raise ValueError(
                    "Chosen clustering method (with current parameters) didn't return representative landmark vectors; can't use SITE_CENTERS_REPRESENTATIVE_LANDMARK."
                )
            for site in range(n_sites):
                weights_nonzero = rep_lvecs[site] > 0
                site_centers[site] = self._pbcc.average(
                    sn.centers[weights_nonzero],
                    weights=rep_lvecs[site, weights_nonzero])
        else:
            raise ValueError("Invalid site centers method '%s'" %
                             self.site_centers_method)
        out_sn.centers = site_centers
        # - If clustering gave us that, compute site vertices
        if landmark_clusters is not None:
            vertices = []
            for lclust in landmark_clusters:
                vertices.append(
                    set.union(*[set(sn.vertices[l]) for l in lclust]))
            out_sn.vertices = vertices

        out_st = SiteTrajectory(out_sn, lmk_lbls, lmk_confs)

        # Check that multiple particles are never assigned to one site at the
        # same time, cause that would be wrong.
        self.n_multiple_assignments, self.avg_mobile_per_site = out_st.check_multiple_occupancy(
            max_mobile_per_site=self.max_mobile_per_site)

        out_st.set_real_traj(orig_frames)
        self._has_run = True

        return out_st
Пример #24
0
    def voronoi(self, structure, radial=False, verbose=True):
        """
        :param Atoms structure: The ASE Atoms to compute the Voronoi decomposition of.
        """

        if self._tmpdir is None:
            raise ValueError("Cannot use Zeopy outside with statement")

        inp = os.path.join(self._tmpdir, "in.cif")
        outp = os.path.join(self._tmpdir, "out.nt2")
        v1out = os.path.join(self._tmpdir, "out.v1")

        ase.io.write(inp, structure)

        # with open(inp, "w") as inf:
        #     inf.write(self.ase2cuc(structure))

        args = []

        if not radial:
            args = ["-nor"]

        try:
            output = subprocess.check_output([self._exe] + args +
                                             ["-v1", v1out, "-nt2", outp, inp],
                                             stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            print("Zeo++ returned an error:", file=sys.stderr)
            print(e.output, file=sys.stderr)
            raise

        if verbose:
            print(output)

        with open(outp, "r") as outf:
            verts, edges = self.parse_nt2(outf.readlines())
        with open(v1out, "r") as outf:
            zeocell = self.parse_v1_cell(outf.readlines())

        # Confirm things really are in order -- sort of
        # Looking at the Zeo code, I don't think it reorders cell vectors --
        # it just rotates them.
        assert np.all(
            np.linalg.norm(zeocell, axis=1) -
            np.linalg.norm(structure.cell, axis=1) < 0.0001)

        vert_coords = np.asarray([v['coords'] for v in verts])

        zeopbcc = PBCCalculator(zeocell)
        real_pbcc = PBCCalculator(structure.cell)

        # Bring into Zeo crystal coordinates
        zeopbcc.to_cell_coords(vert_coords)
        # Bring into our real coords
        real_pbcc.to_real_coords(vert_coords)

        edges_np = np.empty(shape=(len(edges), 2), dtype=np.int)
        edge_radius = np.empty(shape=len(edges), dtype=np.float)
        for i, edge in enumerate(edges):
            edges_np[i, 0] = edge['from']
            edges_np[i, 1] = edge['to']
            edge_radius[i] = edge['radius']

        return (vert_coords, [v['region-atom-indexes']
                              for v in verts], edges_np, edge_radius)
Пример #25
0
    def _build_mic_connmat(self, sn, connectivity_matrix):
        # We use a 3x3x3 = 27 supercell, so there are 27x as many sites
        assert len(sn) == connectivity_matrix.shape[0]

        images = np.asarray(list(itertools.product(range(-1, 2), repeat=3)))
        image_to_idex = dict(
            (100 * (image[0] + 1) + 10 * (image[1] + 1) + (image[2] + 1), i)
            for i, image in enumerate(images))
        n_images = len(images)
        assert n_images == 27

        n_sites = len(sn)
        pos = sn.centers  #.copy() # TODO: copy not needed after reinstall of sitator!
        n_total_sites = len(images) * n_sites
        newmat = lil_matrix((n_total_sites, n_total_sites), dtype=np.bool)

        mask_000 = np.zeros(shape=n_total_sites, dtype=np.bool)
        index_000 = image_to_idex[111]
        mask_000[index_000:index_000 + n_sites] = True
        assert np.sum(mask_000) == len(sn)

        pbcc = PBCCalculator(sn.structure.cell)
        buf = np.empty(shape=3)

        internal_mat = np.zeros_like(connectivity_matrix)
        external_connections = []
        for from_site, to_site in zip(*np.where(connectivity_matrix)):
            buf[:] = pos[to_site]
            if pbcc.min_image(pos[from_site], buf) == 111:
                # If we're in the main image, keep the connection: it's internal
                internal_mat[from_site, to_site] = True
                #internal_mat[to_site, from_site] = True # fake FIXME
            else:
                external_connections.append((from_site, to_site))
                #external_connections.append((to_site, from_site)) # FAKE FIXME

        for image_idex, image in enumerate(images):
            # Make the block diagonal
            newmat[image_idex * n_sites:(image_idex + 1) * n_sites,
                   image_idex * n_sites:(image_idex + 1) *
                   n_sites] = internal_mat

            # Check all external connections from this image; add other sparse entries
            for from_site, to_site in external_connections:
                buf[:] = pos[to_site]
                to_mic = pbcc.min_image(pos[from_site], buf)
                to_in_image = image + [
                    (to_mic // 10**(2 - i) % 10) - 1 for i in range(3)
                ]  # FIXME: is the -1 right
                assert to_in_image is not None, "%s" % to_in_image
                assert np.max(np.abs(to_in_image)) <= 2
                if not np.any(np.abs(to_in_image) > 1):
                    to_in_image = 100 * (to_in_image[0] + 1) + 10 * (
                        to_in_image[1] + 1) + 1 * (to_in_image[2] + 1)
                    newmat[image_idex * n_sites + from_site,
                           image_to_idex[to_in_image] * n_sites +
                           to_site] = True

        assert np.sum(newmat) >= n_images * np.sum(
            internal_mat)  # Lowest it can be is if every one is internal

        return newmat, mask_000, images