def geo_prior_distance(zones: np.array, network: dict, scale: float): """ This function computes the geo prior for the sum of all distances of the mst of a zone Args: zones (np.array): The current zones (boolean array) network (dict): The full network containing all sites. scale (float): The scale (estimated from the data) Returns: float: the geo-prior of the zones """ log_prior = np.ndarray([]) for z in zones: dist_mat = network['dist_mat'][z][:, z] locations = network['locations'][z] if len(locations) > 3: delaunay = compute_delaunay(locations) mst = minimum_spanning_tree(delaunay.multiply(dist_mat)) distances = mst.tocsr()[mst.nonzero()] elif len(locations) == 3: distances = n_smallest_distances(dist_mat, n=2, return_idx=False) elif len(locations) == 2: distances = n_smallest_distances(dist_mat, n=1, return_idx=False) else: raise ValueError("Too few locations to compute distance.") log_prior = stats.expon.logpdf(distances, loc=0, scale=scale) return np.mean(log_prior)
def areas_to_graph(self, area, burn_in, post_freq): # exclude burn-in end_bi = math.ceil(len(area) * burn_in) area = area[end_bi:] # compute frequency of each point in zone area = np.asarray(area) n_samples = area.shape[0] zone_freq = np.sum(area, axis=0) / n_samples in_graph = zone_freq >= post_freq locations = self.locations[in_graph] n_graph = len(locations) # getting indices of points in area area_indices = np.argwhere(in_graph) if n_graph > 3: # computing the delaunay delaunay_sparse = compute_delaunay(locations) delaunay = delaunay_sparse.toarray() # converting delaunay graph to boolean array denoting whether points are connected graph_connections = delaunay > 0 elif n_graph <= 3 or n_graph >= 2: graph_connections = np.ones((n_graph, n_graph), dtype=bool) np.fill_diagonal(graph_connections, 0) else: raise ValueError('No points in contact zone!') point_tuples = [] for index, connected in np.ndenumerate(graph_connections): if connected: # getting indices of points in area i1, i2 = area_indices[index[0]][0], area_indices[index[1]][0] if [i2, i1] not in point_tuples: point_tuples.append([i1, i2]) lines = [] line_weights = [] # count how often i1 and 12 are together in the posterior of the area for p in point_tuples: together_in_area = np.sum(np.all(area[:, p], axis=1)) / n_samples line_weights.append(together_in_area) lines.append(self.locations[[*p]]) return in_graph, lines, line_weights
def geo_prior_gaussian(zones: np.array, network: dict, cov: np.array): """ This function computes the two-dimensional Gaussian geo-prior for all edges in the zone Args: zones (np.array): boolean array representing the current zone network (dict): network containing the graph, location,... cov (np.array): Covariance matrix of the multivariate gaussian (estimated from the data) Returns: float: the log geo-prior of the zones """ log_prior = np.ndarray([]) for z in zones: dist_mat = network['dist_mat'][z][:, z] locations = network['locations'][z] if len(locations) > 3: delaunay = compute_delaunay(locations) mst = minimum_spanning_tree(delaunay.multiply(dist_mat)) i1, i2 = mst.nonzero() elif len(locations) == 3: i1, i2 = n_smallest_distances(dist_mat, n=2, return_idx=True) elif len(locations) == 2: i1, i2 = n_smallest_distances(dist_mat, n=1, return_idx=True) else: raise ValueError("Too few locations to compute distance.") diffs = locations[i1] - locations[i2] prior_z = stats.multivariate_normal.logpdf(diffs, mean=[0, 0], cov=cov) log_prior = np.append(log_prior, prior_z) return np.mean(log_prior)
def compute_network(sites, subset=None): """This function converts a set of sites (language locations plus attributes) into a network (graph). If a subset is defined, only those sites in the subset go into the network. Args: sites(dict): a dict of sites with keys "locations", "id" subset(list): boolean assignment of sites to subset Returns: dict: a network """ if subset is None: # Define vertices and edges vertices = sites['id'] # Delaunay triangulation delaunay = compute_delaunay(sites['locations']) v1, v2 = delaunay.toarray().nonzero() edges = np.column_stack((v1, v2)) # Adjacency Matrix adj_mat = delaunay.tocsr() # Distance matrix diff = sites['locations'][:, None] - sites['locations'] dist_mat = np.linalg.norm(diff, axis=-1) net = { 'vertices': vertices, 'edges': edges, 'locations': sites['locations'], 'names': sites['names'], 'adj_mat': adj_mat, 'n': len(vertices), 'm': edges.shape[0], 'dist_mat': dist_mat, } else: sub_idx = np.nonzero(subset)[0] vertices = list(range(len(sub_idx))) # Delaunay triangulation locations = sites['locations'][sub_idx, :] delaunay = compute_delaunay(locations) v1, v2 = delaunay.toarray().nonzero() edges = np.column_stack((v1, v2)) # Adjacency Matrix adj_mat = delaunay.tocsr() # Distance matrix diff = locations[:, None] - locations dist_mat = np.linalg.norm(diff, axis=-1) names = [sites['names'][i] for i in sub_idx] net = { 'vertices': vertices, 'edges': edges, 'locations': locations, 'names': names, 'adj_mat': adj_mat, 'n': len(vertices), 'm': edges.shape[0], 'dist_mat': dist_mat, } return net
def __init__( self, sites, subset=None, crs=None): """Convert a set of sites into a network. This function converts a set of language locations, with their attributes, into a network (graph). If a subset is defined, only those sites in the subset go into the network. Args: sites(dict): a dict of sites with keys "locations", "id" subset(list): boolean assignment of sites to subset Returns: dict: a network """ if crs is not None: try: from cartopy import crs as ccrs, geodesic except ImportError as e: print("Using a coordinate reference system (crs) requires the ´cartopy´ library:") print("pip install cartopy") raise e if subset is None: # Define vertices and edges vertices = sites['id'] locations = sites['locations'] # Distance matrix self.names = sites['names'] else: sub_idx = np.nonzero(subset)[0] vertices = list(range(len(sub_idx))) # Delaunay triangulation locations = sites['locations'][sub_idx, :] # Distance matrix self.names = [sites['names'][i] for i in sub_idx] # Delaunay triangulation delaunay = compute_delaunay(locations) v1, v2 = delaunay.toarray().nonzero() edges = np.column_stack((v1, v2)) # Adjacency Matrix adj_mat = delaunay.tocsr() if crs is None: loc = np.asarray(sites['locations']) diff = loc[:, None] - loc dist_mat = np.linalg.norm(diff, axis=-1) else: transformer = pyproj.transformer.Transformer.from_crs( crs_from=crs, crs_to=pyproj.crs.CRS("epsg:4326")) w_locations = np.vstack( transformer.transform(locations[:, 0], locations[:, 1]) ).T geod = geodesic.Geodesic() dist_mat = np.hstack([geod.inverse(location, w_locations)[:, :2] for location in w_locations]) self.vertices = vertices self.edges = edges self.locations = locations self.adj_mat = adj_mat self.n = len(vertices) self.m = edges.shape[0] self.dist_mat = dist_mat