def best_cluster(self): """Return the best cluster from this collection.""" if len(self) <= 1: return self results = sorted(self, key=operator.attrgetter("accuracy")) clusters = {} for i, result1 in enumerate(results): clusters[i] = [result1] # allow a 50% buffer zone around each result radius1 = result1.accuracy * 1.5 for j, result2 in enumerate(results): if j > i: # only calculate the upper triangle radius2 = result2.accuracy * 1.5 max_radius = max(radius1, radius2) apart = distance(result1.lat, result1.lon, result2.lat, result2.lon) if apart <= max_radius: clusters[i].append(result2) def sum_score(values): # Sort by highest cumulative score, # break tie by highest individual score return (sum([v.score for v in values]), max([v.score for v in values])) clusters = sorted(clusters.values(), key=sum_score, reverse=True) return clusters[0]
def confirm_station_obs(self): confirm = False if self.has_position(): # station with position confirm = True for obs in self.observations: obs_distance = distance( obs.lat, obs.lon, self.station.lat, self.station.lon ) if obs_distance > self.MAX_DIST_METERS: confirm = False break return confirm
def aggregate_mac_position(networks, minimum_accuracy): # Idea based on https://gis.stackexchange.com/questions/40660 def func(point, points): return numpy.array([ distance(p["lat"], p["lon"], point[0], point[1]) * min(math.sqrt(2000.0 / p["age"]), 1.0) / math.pow(p["signalStrength"], 2) for p in points ]) # Guess initial position as the weighted mean over all networks. points = numpy.array([(net["lat"], net["lon"]) for net in networks], dtype=numpy.double) weights = numpy.array( [ net["score"] * min(math.sqrt(2000.0 / net["age"]), 1.0) / math.pow(net["signalStrength"], 2) for net in networks ], dtype=numpy.double, ) initial = numpy.average(points, axis=0, weights=weights) (lat, lon), cov_x, info, mesg, ier = leastsq(func, initial, args=networks, full_output=True) if ier not in (1, 2, 3, 4): # No solution found, use initial estimate. lat, lon = initial # Guess the accuracy as the 95th percentile of the distances # from the lat/lon to the positions of all networks. distances = numpy.array( [distance(lat, lon, net["lat"], net["lon"]) for net in networks], dtype=numpy.double, ) accuracy = max(numpy.percentile(distances, 95), minimum_accuracy) return (float(lat), float(lon), float(accuracy))
def aggregate_obs(self): positions = numpy.array( [(obs.lat, obs.lon) for obs in self.observations], dtype=numpy.double ) max_lat, max_lon = positions.max(axis=0) min_lat, min_lon = positions.min(axis=0) box_distance = distance(min_lat, min_lon, max_lat, max_lon) if box_distance > self.MAX_DIST_METERS: return None weights = numpy.array( [obs.weight for obs in self.observations], dtype=numpy.double ) lat, lon = numpy.average(positions, axis=0, weights=weights) lat = float(lat) lon = float(lon) radius = circle_radius(lat, lon, max_lat, max_lon, min_lat, min_lon) region = GEOCODER.region(lat, lon) samples, weight = self.bounded_samples_weight( len(self.observations), float(weights.sum()) ) return { "positions": positions, "weights": weights, "lat": lat, "lon": lon, "max_lat": float(max_lat), "min_lat": float(min_lat), "max_lon": float(max_lon), "min_lon": float(min_lon), "radius": radius, "region": region, "samples": samples, "weight": weight, }
def aggregate_cell_position(networks, min_accuracy, max_accuracy): """ Calculate the aggregate position of the user inside the given cluster of networks. Return the position, an accuracy estimate and a combined score. The accuracy is bounded by the min_accuracy and max_accuracy. """ if len(networks) == 1: lat = networks[0]["lat"] lon = networks[0]["lon"] radius = min(max(networks[0]["radius"], min_accuracy), max_accuracy) score = networks[0]["score"] return (float(lat), float(lon), float(radius), float(score)) points = numpy.array([(net["lat"], net["lon"]) for net in networks], dtype=numpy.double) weights = numpy.array( [ net["score"] * min(math.sqrt(2000.0 / net["age"]), 1.0) / math.pow(net["signalStrength"], 2) for net in networks ], dtype=numpy.double, ) lat, lon = numpy.average(points, axis=0, weights=weights) score = networks["score"].sum() # Guess the accuracy as the 95th percentile of the distances # from the lat/lon to the positions of all networks. distances = numpy.array( [distance(lat, lon, net["lat"], net["lon"]) for net in networks], dtype=numpy.double, ) accuracy = min(max(numpy.percentile(distances, 95), min_accuracy), max_accuracy) return (float(lat), float(lon), float(accuracy), float(score))
def get(self, query): """ Get a cached result for the query. :param query: The query for which to look for a cached value. :type query: :class:`ichnaea.api.locate.query.Query` :returns: The cache result or None. :rtype: :class:`~ichnaea.api.locate.fallback.ExternalResult` """ fallback_name = query.api_key.fallback_name if not self._should_cache(query): self._capture_incr(fallback_name, "bypassed") return None cache_keys = self._cache_keys(query) # dict of (lat, lon, fallback) tuples to ExternalResult list # lat/lon clustered into ~100x100 meter grid cells clustered_results = defaultdict(list) not_found_cluster = (None, None, None) try: for value in self.redis_client.mget(cache_keys): if not value: continue value = json.loads(value) if value == LOCATION_NOT_FOUND: value = ExternalResult(None, None, None, None) clustered_results[not_found_cluster] = [value] else: value = ExternalResult(**value) # ~100x100m clusters clustered_results[ (round(value.lat, 3), round(value.lat, 3), value.fallback) ].append(value) except (json.JSONDecodeError, RedisError): self.raven_client.captureException() self._capture_incr(fallback_name, "failure") return None if not clustered_results: self._capture_incr(fallback_name, "miss") return None if list(clustered_results.keys()) == [not_found_cluster]: # the only match was for not found results self._capture_incr(fallback_name, "hit") return clustered_results[not_found_cluster][0] if len(clustered_results) == 1: # all the cached values agree with each other self._capture_incr(fallback_name, "hit") results = list(clustered_results.values())[0] circles = numpy.array( [(res.lat, res.lon, res.accuracy) for res in results], dtype=numpy.double, ) points, accuracies = numpy.hsplit(circles, [2]) lat, lon = points.mean(axis=0) lat = float(lat) lon = float(lon) radius = 0.0 for circle in circles: p_dist = distance(lat, lon, circle[0], circle[1]) + circle[2] radius = max(radius, p_dist) return ExternalResult( lat=lat, lon=lon, accuracy=float(radius), fallback=results[0].fallback ) # inconsistent results self._capture_incr(fallback_name, "inconsistent") return None
def cluster_networks(models, lookups, min_age=0, min_radius=None, min_signal=None, max_distance=None): """ Given a list of database models and lookups, return a list of clusters of nearby networks. """ now = util.utcnow() today = now.date() # Create a dict of macs mapped to their age and signal strength. obs_data = {} for lookup in lookups: obs_data[decode_mac(lookup.mac)] = ( max(abs(lookup.age or min_age), 1000), lookup.signalStrength or min_signal, ) networks = numpy.array( [( model.lat, model.lon, model.radius or min_radius, obs_data[model.mac][0], obs_data[model.mac][1], station_score(model, now), encode_mac(model.mac, codec="base64"), bool(model.last_seen is not None and model.last_seen >= today), ) for model in models], dtype=NETWORK_DTYPE, ) # Only consider clusters that have at least 2 found networks # inside them. Otherwise someone could use a combination of # one real network and one fake and therefor not found network to # get the position of the real network. length = len(networks) if length < 2: # Not enough networks to form a valid cluster. return [] positions = networks[["lat", "lon"]] if length == 2: one = positions[0] two = positions[1] if distance(one[0], one[1], two[0], two[1]) <= max_distance: # Only two networks and they agree, so cluster them. return [networks] else: # Or they disagree forming two clusters of size one, # neither of which is large enough to be returned. return [] # Calculate the condensed distance matrix based on distance in meters. # This avoids calculating the square form, which would calculate # each value twice and avoids calculating the diagonal of zeros. # We avoid the special cases for length < 2 with the above checks. # See scipy.spatial.distance.squareform and # https://stackoverflow.com/questions/13079563 dist_matrix = numpy.zeros(length * (length - 1) // 2, dtype=numpy.double) for i, (a, b) in enumerate(itertools.combinations(positions, 2)): dist_matrix[i] = distance(a[0], a[1], b[0], b[1]) link_matrix = hierarchy.linkage(dist_matrix, method="complete") assignments = hierarchy.fcluster(link_matrix, max_distance, criterion="distance", depth=2) indexed_clusters = defaultdict(list) for i, net in zip(assignments, networks): indexed_clusters[i].append(net) clusters = [] for values in indexed_clusters.values(): if len(values) >= 2: clusters.append(numpy.array(values, dtype=NETWORK_DTYPE)) return clusters
def func(point, points): return numpy.array([ distance(p["lat"], p["lon"], point[0], point[1]) * min(math.sqrt(2000.0 / p["age"]), 1.0) / math.pow(p["signalStrength"], 2) for p in points ])
def region(self, lat, lon): """ Return a region code matching the provided position. If the position is not found inside any region return None. """ # Look up point in RTree of buffered region envelopes. # This is a coarse-grained but very fast match. point = geometry.Point(lon, lat) codes = set([ self._tree_ids[id_] for id_ in self._tree.intersection(point.bounds) ]) if not codes: return None # match point against the buffered polygon shapes buffered_codes = set([ code for code in codes if self._buffered_shapes[code].contains(point) ]) if len(buffered_codes) < 2: return tuple(buffered_codes)[0] if buffered_codes else None # match point against the precise polygon shapes precise_codes = set([ code for code in buffered_codes if self._prepared_shapes[code].contains(point) ]) if len(precise_codes) == 1: return tuple(precise_codes)[0] # Use distance from the border of each region as the tie-breaker. distances = {} # point wasn't in any precise region, which one of the buffered # regions is it closest to? if not precise_codes: for code in buffered_codes: coords = [] if isinstance(self._shapes[code].boundary, geometry.base.BaseMultipartGeometry): for geom in self._shapes[code].boundary.geoms: coords.extend([coord for coord in geom.coords]) else: coords = self._shapes[code].boundary.coords for coord in coords: distances[geocalc.distance(coord[1], coord[0], lat, lon)] = code return distances[min(distances.keys())] # point was in multiple overlapping regions, take the one where it # is farthest away from the border / the most inside a region for code in precise_codes: coords = [] if isinstance(self._shapes[code].boundary, geometry.base.BaseMultipartGeometry): for geom in self._shapes[code].boundary.geoms: coords.extend([coord for coord in geom.coords]) else: coords = self._shapes[code].boundary.coords for coord in coords: distances[geocalc.distance(coord[1], coord[0], lat, lon)] = code return distances[max(distances.keys())]