def cluster_areas(areas, lookups, min_age=0): """ Cluster areas, treat each area as its own cluster. """ now = util.utcnow() today = now.date() # Create a dict of area ids mapped to their age and signal strength. obs_data = {} for lookup in lookups: obs_data[decode_cellarea(lookup.areaid)] = ( max(abs(lookup.age or min_age), 1000), lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType]) clusters = [] for area in areas: clusters.append(numpy.array([( area.lat, area.lon, area.radius, obs_data[area.areaid][0], obs_data[area.areaid][1], area_score(area, now), encode_cellarea(*area.areaid), bool(area.last_seen >= today))], dtype=NETWORK_DTYPE)) return clusters
def test_update(self, celery, session): today = util.utcnow().date() yesterday = today - timedelta(days=1) area = self.area_factory( num_cells=2, radius=500, avg_cell_radius=100, last_seen=yesterday) cell = self.cell_factory( lat=area.lat, lon=area.lon, radius=200, last_seen=today, radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac) self.cell_factory( lat=area.lat, lon=area.lon, radius=300, last_seen=yesterday, radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac) session.commit() areaid = encode_cellarea(*area.areaid) self.area_queue(celery).enqueue([areaid]) self.task.delay().get() session.refresh(area) assert area.lat == cell.lat assert area.lon == cell.lon assert area.radius == 0 assert area.region == 'GB' assert area.avg_cell_radius == 250 assert area.num_cells == 2 assert area.last_seen == today
def cluster_areas(areas, lookups, min_age=0): """ Cluster areas, treat each area as its own cluster. """ now = util.utcnow() today = now.date() # Create a dict of area ids mapped to their age and signal strength. obs_data = {} for lookup in lookups: obs_data[decode_cellarea(lookup.areaid)] = ( max(abs(lookup.age or min_age), 1000), lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType], ) clusters = [] for area in areas: clusters.append( numpy.array( [( area.lat, area.lon, area.radius, obs_data[area.areaid][0], obs_data[area.areaid][1], area_score(area, now), encode_cellarea(*area.areaid, codec="base64"), bool(area.last_seen is not None and area.last_seen >= today), )], dtype=NETWORK_DTYPE, )) return clusters
def import_stations(self, session, pipe, filename): today = util.utcnow().date() on_duplicate = ( '`modified` = values(`modified`)' ', `lat` = values(`lat`)' ', `lon` = values(`lon`)' ', `psc` = values(`psc`)' ', `max_lat` = values(`max_lat`)' ', `min_lat` = values(`min_lat`)' ', `max_lon` = values(`max_lon`)' ', `min_lon` = values(`min_lon`)' ', `radius` = values(`radius`)' ', `samples` = values(`samples`)' ) table_insert = self.cell_model.__table__.insert( mysql_on_duplicate=on_duplicate) def commit_batch(rows): result = session.execute(table_insert, rows) count = result.rowcount # apply trick to avoid querying for existing rows, # MySQL claims 1 row for an inserted row, 2 for an updated row inserted_rows = 2 * len(rows) - count changed_rows = count - len(rows) assert inserted_rows + changed_rows == len(rows) StatCounter(self.stat_key, today).incr(pipe, inserted_rows) areaids = set() with util.gzip_open(filename, 'r') as gzip_wrapper: with gzip_wrapper as gzip_file: csv_reader = csv.reader(gzip_file) parse_row = partial(self.make_import_dict, self.cell_model.validate, self.import_spec) rows = [] for row in csv_reader: # skip any header row if (csv_reader.line_num == 1 and row[0] == 'radio'): # pragma: no cover continue data = parse_row(row) if data is not None: rows.append(data) areaids.add((int(data['radio']), data['mcc'], data['mnc'], data['lac'])) if len(rows) == self.batch_size: # pragma: no cover commit_batch(rows) session.flush() rows = [] if rows: commit_batch(rows) self.area_queue.enqueue( [encode_cellarea(*id_) for id_ in areaids], json=False)
def test_update(self): today = util.utcnow().date() yesterday = today - timedelta(days=1) area = self.area_factory( num_cells=2, radius=500, avg_cell_radius=100, last_seen=yesterday) cell = self.cell_factory( lat=area.lat, lon=area.lon, radius=200, last_seen=today, radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac) self.cell_factory( lat=area.lat, lon=area.lon, radius=300, last_seen=yesterday, radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac) self.session.commit() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid]) self.task.delay().get() self.session.refresh(area) self.assertAlmostEqual(area.lat, cell.lat) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.radius, 0) self.assertEqual(area.region, 'GB') self.assertEqual(area.avg_cell_radius, 250) self.assertEqual(area.num_cells, 2) self.assertEqual(area.last_seen, today)
def test_remove(self): area = self.area_factory() self.session.flush() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid]) self.task.delay().get() self.assertEqual(self.session.query(self.area_model).count(), 0)
def test_remove(self): area = CellAreaFactory() self.session.flush() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid], json=False) update_cellarea.delay().get() self.assertEqual(self.session.query(CellArea).count(), 0)
def test_remove(self, celery, session): area = self.area_factory() session.flush() areaid = encode_cellarea(*area.areaid) self.area_queue(celery).enqueue([areaid]) self.task.delay().get() assert session.query(self.area_model).count() == 0
def test_new(self): cell = CellFactory() self.session.flush() areaid = encode_cellarea( cell.radio, cell.mcc, cell.mnc, cell.lac) self.area_queue.enqueue([areaid], json=False) update_cellarea.delay().get() area = self.session.query(CellArea).one() self.assertAlmostEqual(area.lat, cell.lat) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.radius, 0) self.assertEqual(area.region, 'GB') self.assertEqual(area.num_cells, 1) self.assertEqual(area.avg_cell_radius, cell.radius)
def test_new(self, celery, session): cell = self.cell_factory() session.flush() areaid = encode_cellarea(cell.radio, cell.mcc, cell.mnc, cell.lac) self.area_queue(celery).enqueue([areaid]) self.task.delay().get() area = session.query(self.area_model).one() assert area.lat == cell.lat assert area.lon == cell.lon assert area.radius == 0 assert area.region == "GB" assert area.avg_cell_radius == cell.radius assert area.num_cells == 1 assert area.last_seen == cell.last_seen
def test_new(self): cell = self.cell_factory() self.session.flush() areaid = encode_cellarea( cell.radio, cell.mcc, cell.mnc, cell.lac) self.area_queue.enqueue([areaid]) self.task.delay().get() area = self.session.query(self.area_model).one() self.assertAlmostEqual(area.lat, cell.lat) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.radius, 0) self.assertEqual(area.region, 'GB') self.assertEqual(area.avg_cell_radius, cell.radius) self.assertEqual(area.num_cells, 1) self.assertEqual(area.last_seen, cell.last_seen)
def test_new(self, celery, session): cell = self.cell_factory() session.flush() areaid = encode_cellarea( cell.radio, cell.mcc, cell.mnc, cell.lac) self.area_queue(celery).enqueue([areaid]) self.task.delay().get() area = session.query(self.area_model).one() assert area.lat == cell.lat assert area.lon == cell.lon assert area.radius == 0 assert area.region == 'GB' assert area.avg_cell_radius == cell.radius assert area.num_cells == 1 assert area.last_seen == cell.last_seen
def test_update_incomplete_cell(self): area = CellAreaFactory(radius=500) area_key = {'radio': area.radio, 'mcc': area.mcc, 'mnc': area.mnc, 'lac': area.lac} cell = CellFactory(lat=area.lat + 0.0002, lon=area.lon, **area_key) CellFactory(lat=None, lon=None, **area_key) CellFactory(lat=area.lat, lon=area.lon, max_lat=None, min_lon=None, **area_key) self.session.commit() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid], json=False) update_cellarea.delay().get() self.session.refresh(area) self.assertAlmostEqual(area.lat, cell.lat - 0.0001) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.num_cells, 2)
def test_update(self): area = CellAreaFactory(num_cells=2, radius=500, avg_cell_radius=100) cell = CellFactory( lat=area.lat, lon=area.lon, radius=200, radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac) self.session.commit() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid], json=False) update_cellarea.delay().get() self.session.refresh(area) self.assertAlmostEqual(area.lat, cell.lat) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.radius, 0) self.assertEqual(area.region, 'GB') self.assertEqual(area.num_cells, 1) self.assertEqual(area.avg_cell_radius, 200)
def cluster_areas(areas, lookups): """ Cluster areas, treat each area as its own cluster. """ now = util.utcnow() # Create a dict of area ids mapped to their signal strength. signals = {} for lookup in lookups: signals[lookup.areaid] = lookup.signal or MIN_CELL_SIGNAL clusters = [] for area in areas: clusters.append(numpy.array( [(area.lat, area.lon, area.radius, signals[encode_cellarea(*area.areaid)], area.score(now))], dtype=NETWORK_DTYPE)) return clusters
def test_update_incomplete_cell(self): area = self.area_factory(radius=500) area_key = {'radio': area.radio, 'mcc': area.mcc, 'mnc': area.mnc, 'lac': area.lac} cell = self.cell_factory(lat=area.lat + 0.0002, lon=area.lon, **area_key) self.cell_factory(lat=None, lon=None, **area_key) self.cell_factory(lat=area.lat, lon=area.lon, max_lat=None, min_lon=None, **area_key) self.session.commit() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid]) self.task.delay().get() self.session.refresh(area) self.assertAlmostEqual(area.lat, cell.lat - 0.0001) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.num_cells, 2)
def test_update_incomplete_cell(self, celery, session): area = self.area_factory(radius=500) area_key = {'radio': area.radio, 'mcc': area.mcc, 'mnc': area.mnc, 'lac': area.lac} cell = self.cell_factory(lat=area.lat + 0.0002, lon=area.lon, **area_key) self.cell_factory(lat=None, lon=None, **area_key) self.cell_factory(lat=area.lat, lon=area.lon, max_lat=None, min_lon=None, **area_key) session.commit() areaid = encode_cellarea(*area.areaid) self.area_queue(celery).enqueue([areaid]) self.task.delay().get() session.refresh(area) assert round(area.lat, 7) == round(cell.lat - 0.0001, 7) assert round(area.lon, 7) == round(cell.lon, 7) assert area.num_cells == 2
def cluster_areas(areas, lookups): """ Cluster areas, treat each area as its own cluster. """ now = util.utcnow() # Create a dict of area ids mapped to their signal strength. signals = {} for lookup in lookups: signals[lookup.areaid] = (lookup.signal or MIN_CELL_SIGNAL[lookup.radio]) clusters = [] for area in areas: clusters.append( numpy.array( [(area.lat, area.lon, area.radius, signals[encode_cellarea(*area.areaid)], area.score(now))], dtype=NETWORK_DTYPE)) return clusters
def add_area_update(self, key): self.updated_areas.add(encode_cellarea(*decode_cellid(key)[:4]))
def check_areas(self, celery, obs): queue = celery.data_queues["update_cellarea"] queued = set(queue.dequeue()) cellids = [decode_cellid(ob.unique_key) for ob in obs] areaids = set([encode_cellarea(*cellid[:4]) for cellid in cellids]) assert queued == areaids
def import_stations(self, session, pipe, filename): today = util.utcnow().date() shards = self.cell_model.shards() on_duplicate = ('`modified` = values(`modified`)' ', `lat` = values(`lat`)' ', `lon` = values(`lon`)' ', `psc` = values(`psc`)' ', `max_lat` = values(`max_lat`)' ', `min_lat` = values(`min_lat`)' ', `max_lon` = values(`max_lon`)' ', `min_lon` = values(`min_lon`)' ', `radius` = values(`radius`)' ', `samples` = values(`samples`)') def commit_batch(rows): all_inserted_rows = 0 for shard_id, shard_rows in rows.items(): table_insert = shards[shard_id].__table__.insert( mysql_on_duplicate=on_duplicate) result = session.execute(table_insert, shard_rows) count = result.rowcount # apply trick to avoid querying for existing rows, # MySQL claims 1 row for an inserted row, 2 for an updated row inserted_rows = 2 * len(shard_rows) - count changed_rows = count - len(shard_rows) assert inserted_rows + changed_rows == len(shard_rows) all_inserted_rows += inserted_rows StatCounter(self.stat_key, today).incr(pipe, all_inserted_rows) areaids = set() with util.gzip_open(filename, 'r') as gzip_wrapper: with gzip_wrapper as gzip_file: cell_model = self.cell_model csv_reader = csv.reader(gzip_file) parse_row = partial(self.make_import_dict, self.cell_model.validate, self.import_spec) rows = defaultdict(list) row_count = 0 for row in csv_reader: # skip any header row if (csv_reader.line_num == 1 and row[0] == 'radio'): # pragma: no cover continue data = parse_row(row) if data is not None: rows[cell_model.shard_id(data['radio'])].append(data) row_count += 1 areaids.add((int(data['radio']), data['mcc'], data['mnc'], data['lac'])) if row_count == self.batch_size: # pragma: no cover commit_batch(rows) session.flush() rows = defaultdict(list) row_count = 0 if rows: commit_batch(rows) self.area_queue.enqueue([encode_cellarea(*id_) for id_ in areaids], json=False)
def check_areas(self, celery, obs): queue = celery.data_queues['update_cellarea'] queued = set(queue.dequeue()) cellids = [decode_cellid(ob.unique_key) for ob in obs] areaids = set([encode_cellarea(*cellid[:4]) for cellid in cellids]) assert queued == areaids
def add_area_update(self, updated_areas, key): updated_areas.add(encode_cellarea(*decode_cellid(key)[:4]))