Пример #1
0
def cluster_areas(areas, lookups, min_age=0):
    """
    Cluster areas, treat each area as its own cluster.
    """
    now = util.utcnow()
    today = now.date()

    # Create a dict of area ids mapped to their age and signal strength.
    obs_data = {}
    for lookup in lookups:
        obs_data[decode_cellarea(lookup.areaid)] = (
            max(abs(lookup.age or min_age), 1000),
            lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType],
        )

    clusters = []
    for area in areas:
        clusters.append(
            numpy.array(
                [(
                    area.lat,
                    area.lon,
                    area.radius,
                    obs_data[area.areaid][0],
                    obs_data[area.areaid][1],
                    area_score(area, now),
                    encode_cellarea(*area.areaid, codec="base64"),
                    bool(area.last_seen is not None
                         and area.last_seen >= today),
                )],
                dtype=NETWORK_DTYPE,
            ))

    return clusters
Пример #2
0
    def test_ambiguous_mcc(self, geoip_db, http_session, session, source,
                           stats):
        now = util.utcnow()
        regions = GEOCODER.regions_for_mcc(234, metadata=True)
        area = CellAreaFactory(mcc=234, num_cells=10)
        session.flush()

        query = self.model_query(geoip_db,
                                 http_session,
                                 session,
                                 stats,
                                 cells=[area])
        results = source.search(query)
        self.check_model_results(results, regions)
        assert results.best().region_code == 'GB'
        for result in results:
            score = 0.25
            if result.region_code == 'GB':
                score += area_score(area, now)
            assert result.score == score
        stats.check(counter=[
            (self.api_type + '.source', [
                'key:test', 'region:none', 'source:internal', 'accuracy:low',
                'status:hit'
            ]),
        ])
Пример #3
0
def cluster_areas(areas, lookups, min_age=0):
    """
    Cluster areas, treat each area as its own cluster.
    """
    now = util.utcnow()
    today = now.date()

    # Create a dict of area ids mapped to their age and signal strength.
    obs_data = {}
    for lookup in lookups:
        obs_data[decode_cellarea(lookup.areaid)] = (
            max(abs(lookup.age or min_age), 1000),
            lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType])

    clusters = []
    for area in areas:
        clusters.append(numpy.array([(
            area.lat, area.lon, area.radius,
            obs_data[area.areaid][0],
            obs_data[area.areaid][1],
            area_score(area, now),
            encode_cellarea(*area.areaid),
            bool(area.last_seen >= today))],
            dtype=NETWORK_DTYPE))

    return clusters
Пример #4
0
    def test_ambiguous_mcc(self, geoip_db, http_session, session, source, metricsmock):
        now = util.utcnow()
        regions = GEOCODER.regions_for_mcc(234, metadata=True)
        area = CellAreaFactory(mcc=234, num_cells=10)
        session.flush()

        query = self.model_query(geoip_db, http_session, session, cells=[area])
        results = source.search(query)
        self.check_model_results(results, regions)
        assert results.best().region_code == "GB"
        for result in results:
            score = 0.25
            if result.region_code == "GB":
                score += area_score(area, now)
            assert result.score == score
        assert metricsmock.has_record(
            "incr",
            self.api_type + ".source",
            value=1,
            tags=[
                "key:test",
                "region:none",
                "source:internal",
                "accuracy:low",
                "status:hit",
            ],
        )
Пример #5
0
    def test_smallest_area(self, geoip_db, http_session, session, source):
        now = util.utcnow()
        area = CellAreaFactory(radius=25000, num_cells=8)
        area2 = CellAreaFactory(radius=30000, lat=area.lat + 0.2, num_cells=6)
        session.flush()

        query = self.model_query(geoip_db,
                                 http_session,
                                 session,
                                 cells=[area, area2])
        results = source.search(query)
        self.check_model_results(results, [area])
        assert results.best().score == area_score(area, now)
Пример #6
0
    def test_smallest_area(self, geoip_db, http_session,
                           session, source, stats):
        now = util.utcnow()
        area = CellAreaFactory(radius=25000, num_cells=8)
        area2 = CellAreaFactory(radius=30000, lat=area.lat + 0.2, num_cells=6)
        session.flush()

        query = self.model_query(
            geoip_db, http_session, session, stats,
            cells=[area, area2])
        results = source.search(query)
        self.check_model_results(results, [area])
        assert results.best().score == area_score(area, now)
Пример #7
0
    def test_multiple_mcc(self, geoip_db, http_session, session, source):
        now = util.utcnow()
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        area = CellAreaFactory(mcc=234, num_cells=6)
        area2 = CellAreaFactory(mcc=235, num_cells=8)
        session.flush()

        query = self.model_query(geoip_db, http_session, session, cells=[area, area2])
        results = source.search(query)
        assert len(results) > 2
        best_result = results.best()
        assert best_result.region_code == region.code
        assert best_result.score == 1.25 + area_score(area, now)
Пример #8
0
    def search_cell(self, query):
        results = self.result_list()
        now = util.utcnow()

        ambiguous_cells = []
        regions = []
        for cell in list(query.cell) + list(query.cell_area):
            code = cell.mobileCountryCode
            mcc_regions = GEOCODER.regions_for_mcc(code, metadata=True)
            # Divide score by number of possible regions for the mcc
            score = 1.0 / (len(mcc_regions) or 1.0)
            for mcc_region in mcc_regions:
                regions.append((mcc_region, score))
            if len(mcc_regions) > 1:
                ambiguous_cells.append(cell)

        # Group by region code
        grouped_regions = {}
        for region, score in regions:
            code = region.code
            if code not in grouped_regions:
                grouped_regions[code] = [region, score]
            else:
                # Sum up scores of multiple matches
                grouped_regions[code][1] += score

        if ambiguous_cells:
            # Only do a database query if the mcc is ambiguous.
            # Use the area models for area and cell entries,
            # as we are only interested in the region here,
            # which won't differ between individual cells inside and area.
            areas = query_areas(query, ambiguous_cells, self.area_model,
                                self.raven_client)
            for area in areas:
                code = area.region
                if code and code in grouped_regions:
                    grouped_regions[code][1] += area_score(area, now)

        for region, score in grouped_regions.values():
            results.add(
                self.result_type(
                    region_code=region.code,
                    region_name=region.name,
                    accuracy=region.radius,
                    score=score,
                ))

        return results
Пример #9
0
    def test_multiple_mcc(self, geoip_db, http_session,
                          session, source, stats):
        now = util.utcnow()
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        area = CellAreaFactory(mcc=234, num_cells=6)
        area2 = CellAreaFactory(mcc=235, num_cells=8)
        session.flush()

        query = self.model_query(
            geoip_db, http_session, session, stats,
            cells=[area, area2])
        results = source.search(query)
        assert len(results) > 2
        best_result = results.best()
        assert best_result.region_code == region.code
        assert best_result.score == 1.25 + area_score(area, now)
Пример #10
0
    def search_cell(self, query):
        results = self.result_list()
        now = util.utcnow()

        ambiguous_cells = []
        regions = []
        for cell in list(query.cell) + list(query.cell_area):
            code = cell.mobileCountryCode
            mcc_regions = GEOCODER.regions_for_mcc(code, metadata=True)
            # Divide score by number of possible regions for the mcc
            score = 1.0 / (len(mcc_regions) or 1.0)
            for mcc_region in mcc_regions:
                regions.append((mcc_region, score))
            if len(mcc_regions) > 1:
                ambiguous_cells.append(cell)

        # Group by region code
        grouped_regions = {}
        for region, score in regions:
            code = region.code
            if code not in grouped_regions:
                grouped_regions[code] = [region, score]
            else:
                # Sum up scores of multiple matches
                grouped_regions[code][1] += score

        if ambiguous_cells:
            # Only do a database query if the mcc is ambiguous.
            # Use the area models for area and cell entries,
            # as we are only interested in the region here,
            # which won't differ between individual cells inside and area.
            areas = query_areas(
                query, ambiguous_cells, self.area_model, self.raven_client)
            for area in areas:
                code = area.region
                if code and code in grouped_regions:
                    grouped_regions[code][1] += area_score(area, now)

        for region, score in grouped_regions.values():
            results.add(self.result_type(
                region_code=region.code,
                region_name=region.name,
                accuracy=region.radius,
                score=score))

        return results
Пример #11
0
    def test_ambiguous_mcc(self, geoip_db, http_session,
                           session, source, stats):
        now = util.utcnow()
        regions = GEOCODER.regions_for_mcc(234, metadata=True)
        area = CellAreaFactory(mcc=234, num_cells=10)
        session.flush()

        query = self.model_query(
            geoip_db, http_session, session, stats,
            cells=[area])
        results = source.search(query)
        self.check_model_results(results, regions)
        assert results.best().region_code == 'GB'
        for result in results:
            score = 0.25
            if result.region_code == 'GB':
                score += area_score(area, now)
            assert result.score == score
        stats.check(counter=[
            (self.api_type + '.source',
                ['key:test', 'region:none', 'source:internal',
                 'accuracy:low', 'status:hit']),
        ])
 def test_score_area(self):
     now = util.utcnow()
     area = AreaDummy(created=now, modified=now, radius=10, num_cells=4)
     assert round(area_score(area, now), 2) == 0.2
     area = AreaDummy(created=now, modified=now, radius=0, num_cells=100)
     assert round(area_score(area, now), 2) == 0.1