def aggregate_wifi_position(cluster, result_type): """ Given a single cluster, return the aggregate position of the user inside the cluster. We take at most :data:`ichnaea.api.locate.constants.MAX_WIFIS_IN_CLUSTER` of of the networks in the cluster when estimating the aggregate position. The reason is that we're doing a (non-weighted) centroid calculation, which is itself unbalanced by distant elements. Even if we did a weighted centroid here, using radio intensity as a proxy for distance has an error that increases significantly with distance, so we'd have to underweight pretty heavily. """ # Reverse sort by signal, to pick the best sample of networks. cluster.sort(order='signal') cluster = numpy.flipud(cluster) sample = cluster[:min(len(cluster), MAX_WIFIS_IN_CLUSTER)] circles = numpy.array( [(net[0], net[1], net[2]) for net in sample[['lat', 'lon', 'radius']]], dtype=numpy.double) lat, lon, accuracy = aggregate_position(circles, WIFI_MIN_ACCURACY) accuracy = min(accuracy, WIFI_MAX_ACCURACY) score = float(cluster['score'].sum()) return result_type(lat=lat, lon=lon, accuracy=accuracy, score=score)
def test_circle_radius(self): circles = numpy.array( [(1.0, 1.0, 100.0), (1.001, 1.001, 100.0)], dtype=numpy.double) lat, lon, radius = aggregate_position(circles, 10.0) self.assertEqual((lat, lon), (1.0005, 1.0005)) self.assertAlmostEqual(distance(lat, lon, 1.0, 1.0) + 100.0, radius, 7)
def aggregate_cluster_position(cluster, result_type): """ Given a single cluster, return the aggregate position of the user inside the cluster. We take at most :data:`ichnaea.api.locate.constants.MAX_WIFIS_IN_CLUSTER` of of the networks in the cluster when estimating the aggregate position. The reason is that we're doing a (non-weighted) centroid calculation, which is itself unbalanced by distant elements. Even if we did a weighted centroid here, using radio intensity as a proxy for distance has an error that increases significantly with distance, so we'd have to underweight pretty heavily. """ # Reverse sort by signal, to pick the best sample of networks. cluster.sort(order='signal') cluster = numpy.flipud(cluster) sample = cluster[:min(len(cluster), MAX_WIFIS_IN_CLUSTER)] circles = numpy.array([(net[0], net[1], net[2]) for net in sample[['lat', 'lon', 'radius']]], dtype=numpy.double) lat, lon, accuracy = aggregate_position(circles, WIFI_MIN_ACCURACY) accuracy = min(accuracy, WIFI_MAX_ACCURACY) return result_type(lat=lat, lon=lon, accuracy=accuracy)
def aggregate_cell_position(cells, result_type): """ Given a list of cells from a single cell cluster, return the aggregate position of the user inside the cluster. """ circles = numpy.array( [(cell.lat, cell.lon, cell.range) for cell in cells], dtype=numpy.double) lat, lon, accuracy = aggregate_position(circles, CELL_MIN_ACCURACY) return result_type(lat=lat, lon=lon, accuracy=accuracy)
def aggregate_cell_position(cells, result_type): """ Given a list of cells from a single cell cluster, return the aggregate position of the user inside the cluster. """ circles = numpy.array( [(cell.lat, cell.lon, cell.radius) for cell in cells], dtype=numpy.double) lat, lon, accuracy = aggregate_position(circles, CELL_MIN_ACCURACY) accuracy = min(accuracy, CELL_MAX_ACCURACY) return result_type(lat=lat, lon=lon, accuracy=accuracy)
def aggregate_cell_position(cluster, result_type): """ Given a cell cluster, return the aggregate position of the user inside the cluster. """ circles = numpy.array( [(net[0], net[1], net[2]) for net in cluster[['lat', 'lon', 'radius']]], dtype=numpy.double) lat, lon, accuracy = aggregate_position(circles, CELL_MIN_ACCURACY) accuracy = min(accuracy, CELL_MAX_ACCURACY) score = float(cluster['score'].sum()) return result_type(lat=lat, lon=lon, accuracy=accuracy, score=score)
def aggregate_cluster_position(cluster, result_type): """ Given a single cluster, return the aggregate position of the user inside the cluster. We take at most :data:`ichnaea.api.locate.constants.MAX_WIFIS_IN_CLUSTER` of of the networks in the cluster when estimating the aggregate position. The reason is that we're doing a (non-weighted) centroid calculation, which is itself unbalanced by distant elements. Even if we did a weighted centroid here, using radio intensity as a proxy for distance has an error that increases significantly with distance, so we'd have to underweight pretty heavily. """ sample = cluster[:min(len(cluster), MAX_WIFIS_IN_CLUSTER)] circles = numpy.array( [(wifi.lat, wifi.lon, wifi.radius) for wifi in sample], dtype=numpy.double) lat, lon, accuracy = aggregate_position(circles, WIFI_MIN_ACCURACY) return result_type(lat=lat, lon=lon, accuracy=accuracy)
def test_minimum(self): circles = numpy.array([(1.0, 1.0, 100.0)], dtype=numpy.double) self.assertEqual(aggregate_position(circles, 333.0), (1.0, 1.0, 333.0))
def test_circle_radius(self): circles = numpy.array([(1.0, 1.0, 100.0), (1.001, 1.001, 100.0)], dtype=numpy.double) lat, lon, radius = aggregate_position(circles, 10.0) self.assertEqual((lat, lon), (1.0005, 1.0005)) self.assertAlmostEqual(distance(lat, lon, 1.0, 1.0) + 100.0, radius, 7)
def get(self, query): """ Get a cached result for the query. :param query: The query for which to look for a cached value. :type query: :class:`ichnaea.api.locate.query.Query` :returns: The cache result or None. :rtype: :class:`~ichnaea.api.locate.fallback.ExternalResult` """ if not self._should_cache(query): self._stat_count('cache', tags=['status:bypassed']) return None cache_keys = self._cache_keys(query) # dict of (lat, lon, fallback) tuples to ExternalResult list # lat/lon clustered into ~100x100 meter grid cells clustered_results = defaultdict(list) not_found_cluster = (None, None, None) try: for value in self.redis_client.mget(cache_keys): if not value: continue value = simplejson.loads(value) if value == LOCATION_NOT_FOUND: value = ExternalResult(None, None, None, None) clustered_results[not_found_cluster] = [value] else: value = ExternalResult(**value) # ~100x100m clusters clustered_results[(round(value.lat, 3), round(value.lat, 3), value.fallback)].append(value) except (simplejson.JSONDecodeError, RedisError): self.raven_client.captureException() self._stat_count('cache', tags=['status:failure']) return None if not clustered_results: self._stat_count('cache', tags=['status:miss']) return None if list(clustered_results.keys()) == [not_found_cluster]: # the only match was for not found results self._stat_count('cache', tags=['status:hit']) return clustered_results[not_found_cluster][0] if len(clustered_results) == 1: # all the cached values agree with each other self._stat_count('cache', tags=['status:hit']) results = list(clustered_results.values())[0] circles = numpy.array([(res.lat, res.lon, res.accuracy) for res in results], dtype=numpy.double) lat, lon, accuracy = aggregate_position(circles, 10.0) _, accuracies = numpy.hsplit(circles, [2]) return ExternalResult( lat=lat, lon=lon, accuracy=float(numpy.nanmax(accuracies)), fallback=results[0].fallback, ) # inconsistent results self._stat_count('cache', tags=['status:inconsistent']) return None
def get(self, query): """ Get a cached result for the query. :param query: The query for which to look for a cached value. :type query: :class:`ichnaea.api.locate.query.Query` :returns: The cache result or None. :rtype: :class:`~ichnaea.api.locate.fallback.ExternalResult` """ if not self._should_cache(query): self._stat_count('cache', tags=['status:bypassed']) return None cache_keys = self._cache_keys(query) # dict of (lat, lon, fallback) tuples to ExternalResult list # lat/lon clustered into ~100x100 meter grid cells clustered_results = defaultdict(list) not_found_cluster = (None, None, None) try: for value in self.redis_client.mget(cache_keys): if not value: continue value = simplejson.loads(value) if value == LOCATION_NOT_FOUND: value = ExternalResult(None, None, None, None) clustered_results[not_found_cluster] = [value] else: value = ExternalResult(**value) # ~100x100m clusters clustered_results[(round(value.lat, 3), round(value.lat, 3), value.fallback)].append(value) except (simplejson.JSONDecodeError, RedisError): self.raven_client.captureException() self._stat_count('cache', tags=['status:failure']) return None if not clustered_results: self._stat_count('cache', tags=['status:miss']) return None if list(clustered_results.keys()) == [not_found_cluster]: # the only match was for not found results self._stat_count('cache', tags=['status:hit']) return clustered_results[not_found_cluster][0] if len(clustered_results) == 1: # all the cached values agree with each other self._stat_count('cache', tags=['status:hit']) results = list(clustered_results.values())[0] circles = numpy.array( [(res.lat, res.lon, res.accuracy) for res in results], dtype=numpy.double) lat, lon, accuracy = aggregate_position(circles, 10.0) _, accuracies = numpy.hsplit(circles, [2]) return ExternalResult( lat=lat, lon=lon, accuracy=float(numpy.nanmax(accuracies)), fallback=results[0].fallback, ) # inconsistent results self._stat_count('cache', tags=['status:inconsistent']) return None