Exemplo n.º 1
0
    def predict_probability_area(self, upper_bound, lower_bound,
                                 estimated_loc):
        '''
        Given a prediction and a bounding box this will return a confidence range
        for that prediction

        Args:
            upper_bound (geoCoord): bounding box top right geoCoord
            lower_bound (geoCoord): bounding box bottom left geoCoord
            estimated_loc (LocEstimate): geoCoord of the estimated location

        Returns:
            Probability Tuple(Tuple(float,float)): A probability range tuple (min probability, max probability)
         '''

        geo = estimated_loc.geo_coord

        top_dist = haversine(geo, GeoCoord(upper_bound.lat, geo.lon))
        bottom_dist = haversine(geo, GeoCoord(lower_bound.lat, geo.lon))

        r_dist = haversine(geo, GeoCoord(geo.lat, upper_bound.lon))
        l_dist = haversine(geo, GeoCoord(geo.lat, lower_bound.lon))
        min_dist = min([top_dist, bottom_dist, r_dist, l_dist])
        max_dist = max([top_dist, bottom_dist, r_dist, l_dist])

        #min_prob = self.lookup( (min_dist- med_error)/std_dev)
        #max_prob = self.lookup( (max_dist - med_error)/ std_dev)

        return (self.lookup((min_dist-estimated_loc.dispersion)/estimated_loc.dispersion_std_dev),\
                self.lookup((max_dist-estimated_loc.dispersion)/estimated_loc.dispersion_std_dev))
Exemplo n.º 2
0
    def predict_probability_area(self, upper_bound, lower_bound, estimated_loc):
        '''
        Given a prediction and a bounding box this will return a confidence range
        for that prediction

        Args:
            upper_bound (geoCoord): bounding box top right geoCoord
            lower_bound (geoCoord): bounding box bottom left geoCoord
            estimated_loc (LocEstimate): geoCoord of the estimated location

        Returns:
            Probability Tuple(Tuple(float,float)): A probability range tuple (min probability, max probability)
         '''

        geo = estimated_loc.geo_coord

        top_dist = haversine(geo, GeoCoord(upper_bound.lat, geo.lon))
        bottom_dist = haversine(geo, GeoCoord(lower_bound.lat, geo.lon))

        r_dist = haversine(geo, GeoCoord(geo.lat, upper_bound.lon))
        l_dist = haversine(geo, GeoCoord(geo.lat, lower_bound.lon))
        min_dist = min([top_dist, bottom_dist, r_dist, l_dist])
        max_dist = max([top_dist, bottom_dist, r_dist, l_dist])

        #min_prob = self.lookup( (min_dist- med_error)/std_dev)
        #max_prob = self.lookup( (max_dist - med_error)/ std_dev)

        return (self.lookup((min_dist-estimated_loc.dispersion)/estimated_loc.dispersion_std_dev),\
                self.lookup((max_dist-estimated_loc.dispersion)/estimated_loc.dispersion_std_dev))
Exemplo n.º 3
0
    def load_from_rdds(locs_known,
                       edges,
                       desired_samples=1000,
                       dispersion_threshold=150,
                       neighbor_threshold=3):
        '''
        Creates an EstimatorCurve

        Args:
            locs_known (rdd of LocEstimate): RDD of locations that are known
            edges (rdd of (src_id (dest_id, weight)): RDD of edges in the network
            desired_samples (int): Limit the curve to just a sample of data

        Returns:
            EstimatorCurve:  A new EstimatorCurve representing the known input data
        '''

        # Filter edge list so we never attempt to estimate a "known" location
        known_edges = edges.keyBy(lambda (src_id, (dst_id, weight)): dst_id)\
            .leftOuterJoin(locs_known)\
            .flatMap(lambda (dst_id, (edge, loc_known)): [edge] if loc_known is not None else [] )


        medians =  known_edges.join(locs_known)\
            .map(lambda (src_id, ((dst_id, weight), src_loc)) : (dst_id, (src_loc, weight)))\
            .groupByKey()\
            .filter(lambda (src_id, neighbors) : len(neighbors) >= neighbor_threshold)\
            .mapValues(lambda neighbors :\
                       median(haversine, [loc for loc,w in neighbors], [w for loc,w in neighbors]))\
            .join(locs_known)\
            .mapValues(lambda (found_loc, known_loc) :\
                (found_loc, known_loc,  haversine(known_loc.geo_coord,  found_loc.geo_coord)))\
            .filter(lambda (src_id, (found_loc, known_loc, dist)) : found_loc.dispersion < dispersion_threshold)

        #some medians might have std_devs of zero
        close_locs = medians.filter(lambda (src_id, (
            found_loc, known_loc, dist)): found_loc.dispersion_std_dev == 0)
        #remaining_locs = medians.filter(lambda (src_id, (found_loc, known_loc, dist)) : found_loc.dispersion_std_dev != 0)

        values = medians.map(lambda (src_id, (found_loc, known_loc, dist)) :\
            (src_id, ((dist-found_loc.dispersion)/found_loc.dispersion_std_dev if found_loc.dispersion_std_dev != 0 else 0)))\
            .values()

        values_wo_stdev = close_locs.map(lambda (src_id, (found_loc, known_loc, dist)): (src_id, dist))\
                                                .values()

        return EstimatorCurve(EstimatorCurve.build_curve(values, desired_samples),\
            EstimatorCurve.build_curve(values_wo_stdev, desired_samples))
Exemplo n.º 4
0
    def load_from_rdds(locs_known, edges, desired_samples=1000, dispersion_threshold=150, neighbor_threshold=3):
        '''
        Creates an EstimatorCurve

        Args:
            locs_known (rdd of LocEstimate): RDD of locations that are known
            edges (rdd of (src_id (dest_id, weight)): RDD of edges in the network
            desired_samples (int): Limit the curve to just a sample of data

        Returns:
            EstimatorCurve:  A new EstimatorCurve representing the known input data
        '''

        # Filter edge list so we never attempt to estimate a "known" location
        known_edges = edges.keyBy(lambda (src_id, (dst_id, weight)): dst_id)\
            .leftOuterJoin(locs_known)\
            .flatMap(lambda (dst_id, (edge, loc_known)): [edge] if loc_known is not None else [] )


        medians =  known_edges.join(locs_known)\
            .map(lambda (src_id, ((dst_id, weight), src_loc)) : (dst_id, (src_loc, weight)))\
            .groupByKey()\
            .filter(lambda (src_id, neighbors) : len(neighbors) >= neighbor_threshold)\
            .mapValues(lambda neighbors :\
                       median(haversine, [loc for loc,w in neighbors], [w for loc,w in neighbors]))\
            .join(locs_known)\
            .mapValues(lambda (found_loc, known_loc) :\
                (found_loc, known_loc,  haversine(known_loc.geo_coord,  found_loc.geo_coord)))\
            .filter(lambda (src_id, (found_loc, known_loc, dist)) : found_loc.dispersion < dispersion_threshold)

        #some medians might have std_devs of zero
        close_locs = medians.filter(lambda (src_id, (found_loc, known_loc, dist)) : found_loc.dispersion_std_dev == 0)
        #remaining_locs = medians.filter(lambda (src_id, (found_loc, known_loc, dist)) : found_loc.dispersion_std_dev != 0)

        values = medians.map(lambda (src_id, (found_loc, known_loc, dist)) :\
            (src_id, ((dist-found_loc.dispersion)/found_loc.dispersion_std_dev if found_loc.dispersion_std_dev != 0 else 0)))\
            .values()

        values_wo_stdev = close_locs.map(lambda (src_id, (found_loc, known_loc, dist)): (src_id, dist))\
                                                .values()

        return EstimatorCurve(EstimatorCurve.build_curve(values, desired_samples),\
            EstimatorCurve.build_curve(values_wo_stdev, desired_samples))