def cluster_areas(areas, lookups, min_age=0): """ Cluster areas, treat each area as its own cluster. """ now = util.utcnow() today = now.date() # Create a dict of area ids mapped to their age and signal strength. obs_data = {} for lookup in lookups: obs_data[decode_cellarea(lookup.areaid)] = ( max(abs(lookup.age or min_age), 1000), lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType], ) clusters = [] for area in areas: clusters.append( numpy.array( [( area.lat, area.lon, area.radius, obs_data[area.areaid][0], obs_data[area.areaid][1], area_score(area, now), encode_cellarea(*area.areaid, codec="base64"), bool(area.last_seen is not None and area.last_seen >= today), )], dtype=NETWORK_DTYPE, )) return clusters
def test_ambiguous_mcc(self, geoip_db, http_session, session, source, stats): now = util.utcnow() regions = GEOCODER.regions_for_mcc(234, metadata=True) area = CellAreaFactory(mcc=234, num_cells=10) session.flush() query = self.model_query(geoip_db, http_session, session, stats, cells=[area]) results = source.search(query) self.check_model_results(results, regions) assert results.best().region_code == 'GB' for result in results: score = 0.25 if result.region_code == 'GB': score += area_score(area, now) assert result.score == score stats.check(counter=[ (self.api_type + '.source', [ 'key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit' ]), ])
def cluster_areas(areas, lookups, min_age=0): """ Cluster areas, treat each area as its own cluster. """ now = util.utcnow() today = now.date() # Create a dict of area ids mapped to their age and signal strength. obs_data = {} for lookup in lookups: obs_data[decode_cellarea(lookup.areaid)] = ( max(abs(lookup.age or min_age), 1000), lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType]) clusters = [] for area in areas: clusters.append(numpy.array([( area.lat, area.lon, area.radius, obs_data[area.areaid][0], obs_data[area.areaid][1], area_score(area, now), encode_cellarea(*area.areaid), bool(area.last_seen >= today))], dtype=NETWORK_DTYPE)) return clusters
def test_ambiguous_mcc(self, geoip_db, http_session, session, source, metricsmock): now = util.utcnow() regions = GEOCODER.regions_for_mcc(234, metadata=True) area = CellAreaFactory(mcc=234, num_cells=10) session.flush() query = self.model_query(geoip_db, http_session, session, cells=[area]) results = source.search(query) self.check_model_results(results, regions) assert results.best().region_code == "GB" for result in results: score = 0.25 if result.region_code == "GB": score += area_score(area, now) assert result.score == score assert metricsmock.has_record( "incr", self.api_type + ".source", value=1, tags=[ "key:test", "region:none", "source:internal", "accuracy:low", "status:hit", ], )
def test_smallest_area(self, geoip_db, http_session, session, source): now = util.utcnow() area = CellAreaFactory(radius=25000, num_cells=8) area2 = CellAreaFactory(radius=30000, lat=area.lat + 0.2, num_cells=6) session.flush() query = self.model_query(geoip_db, http_session, session, cells=[area, area2]) results = source.search(query) self.check_model_results(results, [area]) assert results.best().score == area_score(area, now)
def test_smallest_area(self, geoip_db, http_session, session, source, stats): now = util.utcnow() area = CellAreaFactory(radius=25000, num_cells=8) area2 = CellAreaFactory(radius=30000, lat=area.lat + 0.2, num_cells=6) session.flush() query = self.model_query( geoip_db, http_session, session, stats, cells=[area, area2]) results = source.search(query) self.check_model_results(results, [area]) assert results.best().score == area_score(area, now)
def test_multiple_mcc(self, geoip_db, http_session, session, source): now = util.utcnow() region = GEOCODER.regions_for_mcc(235, metadata=True)[0] area = CellAreaFactory(mcc=234, num_cells=6) area2 = CellAreaFactory(mcc=235, num_cells=8) session.flush() query = self.model_query(geoip_db, http_session, session, cells=[area, area2]) results = source.search(query) assert len(results) > 2 best_result = results.best() assert best_result.region_code == region.code assert best_result.score == 1.25 + area_score(area, now)
def search_cell(self, query): results = self.result_list() now = util.utcnow() ambiguous_cells = [] regions = [] for cell in list(query.cell) + list(query.cell_area): code = cell.mobileCountryCode mcc_regions = GEOCODER.regions_for_mcc(code, metadata=True) # Divide score by number of possible regions for the mcc score = 1.0 / (len(mcc_regions) or 1.0) for mcc_region in mcc_regions: regions.append((mcc_region, score)) if len(mcc_regions) > 1: ambiguous_cells.append(cell) # Group by region code grouped_regions = {} for region, score in regions: code = region.code if code not in grouped_regions: grouped_regions[code] = [region, score] else: # Sum up scores of multiple matches grouped_regions[code][1] += score if ambiguous_cells: # Only do a database query if the mcc is ambiguous. # Use the area models for area and cell entries, # as we are only interested in the region here, # which won't differ between individual cells inside and area. areas = query_areas(query, ambiguous_cells, self.area_model, self.raven_client) for area in areas: code = area.region if code and code in grouped_regions: grouped_regions[code][1] += area_score(area, now) for region, score in grouped_regions.values(): results.add( self.result_type( region_code=region.code, region_name=region.name, accuracy=region.radius, score=score, )) return results
def test_multiple_mcc(self, geoip_db, http_session, session, source, stats): now = util.utcnow() region = GEOCODER.regions_for_mcc(235, metadata=True)[0] area = CellAreaFactory(mcc=234, num_cells=6) area2 = CellAreaFactory(mcc=235, num_cells=8) session.flush() query = self.model_query( geoip_db, http_session, session, stats, cells=[area, area2]) results = source.search(query) assert len(results) > 2 best_result = results.best() assert best_result.region_code == region.code assert best_result.score == 1.25 + area_score(area, now)
def search_cell(self, query): results = self.result_list() now = util.utcnow() ambiguous_cells = [] regions = [] for cell in list(query.cell) + list(query.cell_area): code = cell.mobileCountryCode mcc_regions = GEOCODER.regions_for_mcc(code, metadata=True) # Divide score by number of possible regions for the mcc score = 1.0 / (len(mcc_regions) or 1.0) for mcc_region in mcc_regions: regions.append((mcc_region, score)) if len(mcc_regions) > 1: ambiguous_cells.append(cell) # Group by region code grouped_regions = {} for region, score in regions: code = region.code if code not in grouped_regions: grouped_regions[code] = [region, score] else: # Sum up scores of multiple matches grouped_regions[code][1] += score if ambiguous_cells: # Only do a database query if the mcc is ambiguous. # Use the area models for area and cell entries, # as we are only interested in the region here, # which won't differ between individual cells inside and area. areas = query_areas( query, ambiguous_cells, self.area_model, self.raven_client) for area in areas: code = area.region if code and code in grouped_regions: grouped_regions[code][1] += area_score(area, now) for region, score in grouped_regions.values(): results.add(self.result_type( region_code=region.code, region_name=region.name, accuracy=region.radius, score=score)) return results
def test_ambiguous_mcc(self, geoip_db, http_session, session, source, stats): now = util.utcnow() regions = GEOCODER.regions_for_mcc(234, metadata=True) area = CellAreaFactory(mcc=234, num_cells=10) session.flush() query = self.model_query( geoip_db, http_session, session, stats, cells=[area]) results = source.search(query) self.check_model_results(results, regions) assert results.best().region_code == 'GB' for result in results: score = 0.25 if result.region_code == 'GB': score += area_score(area, now) assert result.score == score stats.check(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def test_score_area(self): now = util.utcnow() area = AreaDummy(created=now, modified=now, radius=10, num_cells=4) assert round(area_score(area, now), 2) == 0.2 area = AreaDummy(created=now, modified=now, radius=0, num_cells=100) assert round(area_score(area, now), 2) == 0.1