Пример #1
0
    def best_cluster(self):
        """Return the best cluster from this collection."""
        if len(self) <= 1:
            return self

        results = sorted(self, key=operator.attrgetter("accuracy"))

        clusters = {}
        for i, result1 in enumerate(results):
            clusters[i] = [result1]
            # allow a 50% buffer zone around each result
            radius1 = result1.accuracy * 1.5
            for j, result2 in enumerate(results):
                if j > i:
                    # only calculate the upper triangle
                    radius2 = result2.accuracy * 1.5
                    max_radius = max(radius1, radius2)
                    apart = distance(result1.lat, result1.lon, result2.lat,
                                     result2.lon)
                    if apart <= max_radius:
                        clusters[i].append(result2)

        def sum_score(values):
            # Sort by highest cumulative score,
            # break tie by highest individual score
            return (sum([v.score
                         for v in values]), max([v.score for v in values]))

        clusters = sorted(clusters.values(), key=sum_score, reverse=True)
        return clusters[0]
Пример #2
0
    def confirm_station_obs(self):
        confirm = False
        if self.has_position():
            # station with position
            confirm = True
            for obs in self.observations:
                obs_distance = distance(
                    obs.lat, obs.lon, self.station.lat, self.station.lon
                )
                if obs_distance > self.MAX_DIST_METERS:
                    confirm = False
                    break

        return confirm
Пример #3
0
def aggregate_mac_position(networks, minimum_accuracy):
    # Idea based on https://gis.stackexchange.com/questions/40660

    def func(point, points):
        return numpy.array([
            distance(p["lat"], p["lon"], point[0], point[1]) *
            min(math.sqrt(2000.0 / p["age"]), 1.0) /
            math.pow(p["signalStrength"], 2) for p in points
        ])

    # Guess initial position as the weighted mean over all networks.
    points = numpy.array([(net["lat"], net["lon"]) for net in networks],
                         dtype=numpy.double)

    weights = numpy.array(
        [
            net["score"] * min(math.sqrt(2000.0 / net["age"]), 1.0) /
            math.pow(net["signalStrength"], 2) for net in networks
        ],
        dtype=numpy.double,
    )

    initial = numpy.average(points, axis=0, weights=weights)

    (lat, lon), cov_x, info, mesg, ier = leastsq(func,
                                                 initial,
                                                 args=networks,
                                                 full_output=True)

    if ier not in (1, 2, 3, 4):
        # No solution found, use initial estimate.
        lat, lon = initial

    # Guess the accuracy as the 95th percentile of the distances
    # from the lat/lon to the positions of all networks.
    distances = numpy.array(
        [distance(lat, lon, net["lat"], net["lon"]) for net in networks],
        dtype=numpy.double,
    )
    accuracy = max(numpy.percentile(distances, 95), minimum_accuracy)

    return (float(lat), float(lon), float(accuracy))
Пример #4
0
    def aggregate_obs(self):
        positions = numpy.array(
            [(obs.lat, obs.lon) for obs in self.observations], dtype=numpy.double
        )

        max_lat, max_lon = positions.max(axis=0)
        min_lat, min_lon = positions.min(axis=0)

        box_distance = distance(min_lat, min_lon, max_lat, max_lon)
        if box_distance > self.MAX_DIST_METERS:
            return None

        weights = numpy.array(
            [obs.weight for obs in self.observations], dtype=numpy.double
        )

        lat, lon = numpy.average(positions, axis=0, weights=weights)
        lat = float(lat)
        lon = float(lon)
        radius = circle_radius(lat, lon, max_lat, max_lon, min_lat, min_lon)
        region = GEOCODER.region(lat, lon)

        samples, weight = self.bounded_samples_weight(
            len(self.observations), float(weights.sum())
        )

        return {
            "positions": positions,
            "weights": weights,
            "lat": lat,
            "lon": lon,
            "max_lat": float(max_lat),
            "min_lat": float(min_lat),
            "max_lon": float(max_lon),
            "min_lon": float(min_lon),
            "radius": radius,
            "region": region,
            "samples": samples,
            "weight": weight,
        }
Пример #5
0
def aggregate_cell_position(networks, min_accuracy, max_accuracy):
    """
    Calculate the aggregate position of the user inside the given
    cluster of networks.

    Return the position, an accuracy estimate and a combined score.
    The accuracy is bounded by the min_accuracy and max_accuracy.
    """
    if len(networks) == 1:
        lat = networks[0]["lat"]
        lon = networks[0]["lon"]
        radius = min(max(networks[0]["radius"], min_accuracy), max_accuracy)
        score = networks[0]["score"]
        return (float(lat), float(lon), float(radius), float(score))

    points = numpy.array([(net["lat"], net["lon"]) for net in networks],
                         dtype=numpy.double)

    weights = numpy.array(
        [
            net["score"] * min(math.sqrt(2000.0 / net["age"]), 1.0) /
            math.pow(net["signalStrength"], 2) for net in networks
        ],
        dtype=numpy.double,
    )

    lat, lon = numpy.average(points, axis=0, weights=weights)
    score = networks["score"].sum()

    # Guess the accuracy as the 95th percentile of the distances
    # from the lat/lon to the positions of all networks.
    distances = numpy.array(
        [distance(lat, lon, net["lat"], net["lon"]) for net in networks],
        dtype=numpy.double,
    )
    accuracy = min(max(numpy.percentile(distances, 95), min_accuracy),
                   max_accuracy)

    return (float(lat), float(lon), float(accuracy), float(score))
Пример #6
0
    def get(self, query):
        """
        Get a cached result for the query.

        :param query: The query for which to look for a cached value.
        :type query: :class:`ichnaea.api.locate.query.Query`

        :returns: The cache result or None.
        :rtype: :class:`~ichnaea.api.locate.fallback.ExternalResult`
        """
        fallback_name = query.api_key.fallback_name

        if not self._should_cache(query):
            self._capture_incr(fallback_name, "bypassed")
            return None

        cache_keys = self._cache_keys(query)
        # dict of (lat, lon, fallback) tuples to ExternalResult list
        # lat/lon clustered into ~100x100 meter grid cells
        clustered_results = defaultdict(list)
        not_found_cluster = (None, None, None)
        try:
            for value in self.redis_client.mget(cache_keys):
                if not value:
                    continue

                value = json.loads(value)
                if value == LOCATION_NOT_FOUND:
                    value = ExternalResult(None, None, None, None)
                    clustered_results[not_found_cluster] = [value]
                else:
                    value = ExternalResult(**value)
                    # ~100x100m clusters
                    clustered_results[
                        (round(value.lat, 3), round(value.lat, 3), value.fallback)
                    ].append(value)
        except (json.JSONDecodeError, RedisError):
            self.raven_client.captureException()
            self._capture_incr(fallback_name, "failure")
            return None

        if not clustered_results:
            self._capture_incr(fallback_name, "miss")
            return None

        if list(clustered_results.keys()) == [not_found_cluster]:
            # the only match was for not found results
            self._capture_incr(fallback_name, "hit")
            return clustered_results[not_found_cluster][0]

        if len(clustered_results) == 1:
            # all the cached values agree with each other
            self._capture_incr(fallback_name, "hit")
            results = list(clustered_results.values())[0]

            circles = numpy.array(
                [(res.lat, res.lon, res.accuracy) for res in results],
                dtype=numpy.double,
            )
            points, accuracies = numpy.hsplit(circles, [2])

            lat, lon = points.mean(axis=0)
            lat = float(lat)
            lon = float(lon)

            radius = 0.0
            for circle in circles:
                p_dist = distance(lat, lon, circle[0], circle[1]) + circle[2]
                radius = max(radius, p_dist)

            return ExternalResult(
                lat=lat, lon=lon, accuracy=float(radius), fallback=results[0].fallback
            )

        # inconsistent results
        self._capture_incr(fallback_name, "inconsistent")
        return None
Пример #7
0
def cluster_networks(models,
                     lookups,
                     min_age=0,
                     min_radius=None,
                     min_signal=None,
                     max_distance=None):
    """
    Given a list of database models and lookups, return
    a list of clusters of nearby networks.
    """
    now = util.utcnow()
    today = now.date()

    # Create a dict of macs mapped to their age and signal strength.
    obs_data = {}
    for lookup in lookups:
        obs_data[decode_mac(lookup.mac)] = (
            max(abs(lookup.age or min_age), 1000),
            lookup.signalStrength or min_signal,
        )

    networks = numpy.array(
        [(
            model.lat,
            model.lon,
            model.radius or min_radius,
            obs_data[model.mac][0],
            obs_data[model.mac][1],
            station_score(model, now),
            encode_mac(model.mac, codec="base64"),
            bool(model.last_seen is not None and model.last_seen >= today),
        ) for model in models],
        dtype=NETWORK_DTYPE,
    )

    # Only consider clusters that have at least 2 found networks
    # inside them. Otherwise someone could use a combination of
    # one real network and one fake and therefor not found network to
    # get the position of the real network.
    length = len(networks)
    if length < 2:
        # Not enough networks to form a valid cluster.
        return []

    positions = networks[["lat", "lon"]]
    if length == 2:
        one = positions[0]
        two = positions[1]
        if distance(one[0], one[1], two[0], two[1]) <= max_distance:
            # Only two networks and they agree, so cluster them.
            return [networks]
        else:
            # Or they disagree forming two clusters of size one,
            # neither of which is large enough to be returned.
            return []

    # Calculate the condensed distance matrix based on distance in meters.
    # This avoids calculating the square form, which would calculate
    # each value twice and avoids calculating the diagonal of zeros.
    # We avoid the special cases for length < 2 with the above checks.
    # See scipy.spatial.distance.squareform and
    # https://stackoverflow.com/questions/13079563
    dist_matrix = numpy.zeros(length * (length - 1) // 2, dtype=numpy.double)
    for i, (a, b) in enumerate(itertools.combinations(positions, 2)):
        dist_matrix[i] = distance(a[0], a[1], b[0], b[1])

    link_matrix = hierarchy.linkage(dist_matrix, method="complete")
    assignments = hierarchy.fcluster(link_matrix,
                                     max_distance,
                                     criterion="distance",
                                     depth=2)

    indexed_clusters = defaultdict(list)
    for i, net in zip(assignments, networks):
        indexed_clusters[i].append(net)

    clusters = []
    for values in indexed_clusters.values():
        if len(values) >= 2:
            clusters.append(numpy.array(values, dtype=NETWORK_DTYPE))

    return clusters
Пример #8
0
 def func(point, points):
     return numpy.array([
         distance(p["lat"], p["lon"], point[0], point[1]) *
         min(math.sqrt(2000.0 / p["age"]), 1.0) /
         math.pow(p["signalStrength"], 2) for p in points
     ])
Пример #9
0
    def region(self, lat, lon):
        """
        Return a region code matching the provided position.
        If the position is not found inside any region return None.
        """
        # Look up point in RTree of buffered region envelopes.
        # This is a coarse-grained but very fast match.
        point = geometry.Point(lon, lat)
        codes = set([
            self._tree_ids[id_]
            for id_ in self._tree.intersection(point.bounds)
        ])

        if not codes:
            return None

        # match point against the buffered polygon shapes
        buffered_codes = set([
            code for code in codes
            if self._buffered_shapes[code].contains(point)
        ])
        if len(buffered_codes) < 2:
            return tuple(buffered_codes)[0] if buffered_codes else None

        # match point against the precise polygon shapes
        precise_codes = set([
            code for code in buffered_codes
            if self._prepared_shapes[code].contains(point)
        ])

        if len(precise_codes) == 1:
            return tuple(precise_codes)[0]

        # Use distance from the border of each region as the tie-breaker.
        distances = {}

        # point wasn't in any precise region, which one of the buffered
        # regions is it closest to?
        if not precise_codes:
            for code in buffered_codes:
                coords = []
                if isinstance(self._shapes[code].boundary,
                              geometry.base.BaseMultipartGeometry):
                    for geom in self._shapes[code].boundary.geoms:
                        coords.extend([coord for coord in geom.coords])
                else:
                    coords = self._shapes[code].boundary.coords
                for coord in coords:
                    distances[geocalc.distance(coord[1], coord[0], lat,
                                               lon)] = code
            return distances[min(distances.keys())]

        # point was in multiple overlapping regions, take the one where it
        # is farthest away from the border / the most inside a region
        for code in precise_codes:
            coords = []
            if isinstance(self._shapes[code].boundary,
                          geometry.base.BaseMultipartGeometry):
                for geom in self._shapes[code].boundary.geoms:
                    coords.extend([coord for coord in geom.coords])
            else:
                coords = self._shapes[code].boundary.coords
            for coord in coords:
                distances[geocalc.distance(coord[1], coord[0], lat,
                                           lon)] = code
        return distances[max(distances.keys())]