def geo_prior_distance(zones: np.array, network: dict, scale: float):
    """ This function computes the geo prior for the sum of all distances of the mst of a zone
        zones (np.array): The current zones (boolean array)
        network (dict):  The full network containing all sites.
        scale (float): The scale (estimated from the data)

        float: the geo-prior of the zones
    log_prior = np.ndarray([])
    for z in zones:

        dist_mat = network['dist_mat'][z][:, z]
        locations = network['locations'][z]

        if len(locations) > 3:

            delaunay = compute_delaunay(locations)
            mst = minimum_spanning_tree(delaunay.multiply(dist_mat))
            distances = mst.tocsr()[mst.nonzero()]

        elif len(locations) == 3:
            distances = n_smallest_distances(dist_mat, n=2, return_idx=False)

        elif len(locations) == 2:
            distances = n_smallest_distances(dist_mat, n=1, return_idx=False)
            raise ValueError("Too few locations to compute distance.")

        log_prior = stats.expon.logpdf(distances, loc=0, scale=scale)

    return np.mean(log_prior)
    def areas_to_graph(self, area, burn_in, post_freq):

        # exclude burn-in
        end_bi = math.ceil(len(area) * burn_in)
        area = area[end_bi:]

        # compute frequency of each point in zone
        area = np.asarray(area)
        n_samples = area.shape[0]

        zone_freq = np.sum(area, axis=0) / n_samples
        in_graph = zone_freq >= post_freq

        locations = self.locations[in_graph]
        n_graph = len(locations)

        # getting indices of points in area
        area_indices = np.argwhere(in_graph)

        if n_graph > 3:
            # computing the delaunay
            delaunay_sparse = compute_delaunay(locations)
            delaunay = delaunay_sparse.toarray()
            # converting delaunay graph to boolean array denoting whether points are connected
            graph_connections = delaunay > 0

        elif n_graph <= 3 or n_graph >= 2:
            graph_connections = np.ones((n_graph, n_graph), dtype=bool)
            np.fill_diagonal(graph_connections, 0)

            raise ValueError('No points in contact zone!')

        point_tuples = []
        for index, connected in np.ndenumerate(graph_connections):
            if connected:
                # getting indices of points in area
                i1, i2 = area_indices[index[0]][0], area_indices[index[1]][0]
                if [i2, i1] not in point_tuples:
                    point_tuples.append([i1, i2])
        lines = []
        line_weights = []
        # count how often i1 and 12 are together in the posterior of the area

        for p in point_tuples:
            together_in_area = np.sum(np.all(area[:, p], axis=1)) / n_samples

        return in_graph, lines, line_weights
def geo_prior_gaussian(zones: np.array, network: dict, cov: np.array):
    This function computes the two-dimensional Gaussian geo-prior for all edges in the zone
        zones (np.array): boolean array representing the current zone
        network (dict): network containing the graph, location,...
        cov (np.array): Covariance matrix of the multivariate gaussian (estimated from the data)

        float: the log geo-prior of the zones
    log_prior = np.ndarray([])
    for z in zones:
        dist_mat = network['dist_mat'][z][:, z]
        locations = network['locations'][z]

        if len(locations) > 3:

            delaunay = compute_delaunay(locations)
            mst = minimum_spanning_tree(delaunay.multiply(dist_mat))
            i1, i2 = mst.nonzero()

        elif len(locations) == 3:
            i1, i2 = n_smallest_distances(dist_mat, n=2, return_idx=True)

        elif len(locations) == 2:
            i1, i2 = n_smallest_distances(dist_mat, n=1, return_idx=True)

            raise ValueError("Too few locations to compute distance.")

        diffs = locations[i1] - locations[i2]
        prior_z = stats.multivariate_normal.logpdf(diffs, mean=[0, 0], cov=cov)
        log_prior = np.append(log_prior, prior_z)

    return np.mean(log_prior)
def compute_network(sites, subset=None):
    """This function converts a set of sites (language locations plus attributes) into a network (graph).
    If a subset is defined, only those sites in the subset go into the network.

        sites(dict): a dict of sites with keys "locations", "id"
        subset(list): boolean assignment of sites to subset
        dict: a network

    if subset is None:

        # Define vertices and edges
        vertices = sites['id']

        # Delaunay triangulation
        delaunay = compute_delaunay(sites['locations'])
        v1, v2 = delaunay.toarray().nonzero()
        edges = np.column_stack((v1, v2))

        # Adjacency Matrix
        adj_mat = delaunay.tocsr()

        # Distance matrix
        diff = sites['locations'][:, None] - sites['locations']
        dist_mat = np.linalg.norm(diff, axis=-1)

        net = {
            'vertices': vertices,
            'edges': edges,
            'locations': sites['locations'],
            'names': sites['names'],
            'adj_mat': adj_mat,
            'n': len(vertices),
            'm': edges.shape[0],
            'dist_mat': dist_mat,

        sub_idx = np.nonzero(subset)[0]
        vertices = list(range(len(sub_idx)))

        # Delaunay triangulation
        locations = sites['locations'][sub_idx, :]
        delaunay = compute_delaunay(locations)
        v1, v2 = delaunay.toarray().nonzero()
        edges = np.column_stack((v1, v2))

        # Adjacency Matrix
        adj_mat = delaunay.tocsr()

        # Distance matrix
        diff = locations[:, None] - locations
        dist_mat = np.linalg.norm(diff, axis=-1)

        names = [sites['names'][i] for i in sub_idx]

        net = {
            'vertices': vertices,
            'edges': edges,
            'locations': locations,
            'names': names,
            'adj_mat': adj_mat,
            'n': len(vertices),
            'm': edges.shape[0],
            'dist_mat': dist_mat,
    return net
    def __init__(
        """Convert a set of sites into a network.

        This function converts a set of language locations, with their attributes,
        into a network (graph). If a subset is defined, only those sites in the
        subset go into the network.

            sites(dict): a dict of sites with keys "locations", "id"
            subset(list): boolean assignment of sites to subset
            dict: a network

        if crs is not None:
                from cartopy import crs as ccrs, geodesic
            except ImportError as e:
                print("Using a coordinate reference system (crs) requires the ´cartopy´ library:")
                print("pip install cartopy")
                raise e

        if subset is None:
            # Define vertices and edges
            vertices = sites['id']

            locations = sites['locations']

            # Distance matrix
            self.names = sites['names']
            sub_idx = np.nonzero(subset)[0]
            vertices = list(range(len(sub_idx)))

            # Delaunay triangulation
            locations = sites['locations'][sub_idx, :]

            # Distance matrix
            self.names = [sites['names'][i] for i in sub_idx]

        # Delaunay triangulation

        delaunay = compute_delaunay(locations)
        v1, v2 = delaunay.toarray().nonzero()
        edges = np.column_stack((v1, v2))

        # Adjacency Matrix
        adj_mat = delaunay.tocsr()

        if crs is None:
            loc = np.asarray(sites['locations'])
            diff = loc[:, None] - loc
            dist_mat = np.linalg.norm(diff, axis=-1)
            transformer = pyproj.transformer.Transformer.from_crs(
                crs_from=crs, crs_to=pyproj.crs.CRS("epsg:4326"))
            w_locations = np.vstack(
                transformer.transform(locations[:, 0], locations[:, 1])
            geod = geodesic.Geodesic()
            dist_mat = np.hstack([geod.inverse(location, w_locations)[:, :2] for location in w_locations])

        self.vertices = vertices
        self.edges = edges
        self.locations = locations
        self.adj_mat = adj_mat
        self.n = len(vertices)
        self.m = edges.shape[0]
        self.dist_mat = dist_mat