def import_stations(session, pipe, filename, fields): today = util.utcnow().date() def commit_batch(ins, rows, commit=True): result = session.execute(ins, rows) count = result.rowcount # apply trick to avoid querying for existing rows, # MySQL claims 1 row for an inserted row, 2 for an updated row inserted_rows = 2 * len(rows) - count changed_rows = count - len(rows) assert inserted_rows + changed_rows == len(rows) StatCounter(StatKey.unique_ocid_cell, today).incr(pipe, inserted_rows) if commit: session.commit() else: # pragma: no cover session.flush() with GzipFile(filename, 'rb') as zip_file: csv_reader = csv.DictReader(zip_file, fields) batch = 10000 rows = [] area_keys = set() ins = OCIDCell.__table__.insert( on_duplicate=(( 'changeable = values(changeable), ' 'modified = values(modified), ' 'total_measures = values(total_measures), ' 'lat = values(lat), ' 'lon = values(lon), ' 'psc = values(psc), ' '`range` = values(`range`)'))) for row in csv_reader: # skip any header row if csv_reader.line_num == 1 and \ 'radio' in row.values(): # pragma: no cover continue data = make_ocid_cell_import_dict(row) if data is not None: rows.append(data) area_keys.add(CellArea.to_hashkey(data)) if len(rows) == batch: # pragma: no cover commit_batch(ins, rows, commit=False) rows = [] if rows: commit_batch(ins, rows) for area_key in area_keys: update_area.delay(area_key, cell_type='ocid')
def remove(self, cell_keys): cells_removed = 0 changed_areas = set() for key in cell_keys: query = Cell.querykey(self.session, key) cells_removed += query.delete() changed_areas.add(CellArea.to_hashkey(key)) if changed_areas: self.session.on_post_commit(enqueue_areas, self.redis_client, changed_areas, UPDATE_KEY['cell_lac']) return cells_removed
def __call__(self, cell_keys): cells_removed = 0 changed_areas = set() area_queue = self.task.app.data_queues['update_cellarea'] for key in cell_keys: query = Cell.querykey(self.session, key) cells_removed += query.delete() changed_areas.add(CellArea.to_hashkey(key)) if changed_areas: area_queue.enqueue(changed_areas, pipe=self.pipe) return cells_removed
def test_new(self): cell = CellFactory() self.session.flush() area_key = CellArea.to_hashkey(cell) self.area_queue.enqueue([area_key]) self.assertEqual(scan_areas.delay().get(), 1) area = self.session.query(CellArea).one() self.assertAlmostEqual(area.lat, cell.lat) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.range, 0) self.assertEqual(area.num_cells, 1) self.assertEqual(area.avg_cell_range, cell.range)
def test_scan_lacs_remove(self): session = self.db_master_session redis_client = self.redis_client # create an orphaned lac entry key = dict(radio=1, mcc=1, mnc=1, lac=1) session.add(CellArea(**key)) session.flush() enqueue_lacs(session, redis_client, [CellArea.to_hashkey(key)], UPDATE_KEY['cell_lac']) # after scanning the orphaned record gets removed self.assertEqual(scan_lacs.delay().get(), 1) lacs = session.query(CellArea).all() self.assertEqual(lacs, [])
def remove(self, cell_keys): cells_removed = 0 changed_areas = set() for key in cell_keys: query = Cell.querykey(self.session, key) cells_removed += query.delete() changed_areas.add(CellArea.to_hashkey(key)) if changed_areas: redis_key = self.task.app.data_queues['cell_area_update'] self.session.on_post_commit(enqueue_areas, self.redis_client, changed_areas, redis_key) return cells_removed
def remove(self, cell_keys): cells_removed = 0 changed_areas = set() for key in cell_keys: query = Cell.querykey(self.session, key) cells_removed += query.delete() changed_areas.add(CellArea.to_hashkey(key)) if changed_areas: redis_key = self.task.app.data_queues['cell_area_update'] self.session.on_post_commit( enqueue_areas, self.redis_client, changed_areas, redis_key) return cells_removed
def remove_cell(self, cell_keys): cells_removed = 0 redis_client = self.app.redis_client with self.db_session() as session: changed_lacs = set() for k in cell_keys: key = Cell.to_hashkey(k) query = session.query(Cell).filter(*Cell.joinkey(key)) cells_removed += query.delete() changed_lacs.add(CellArea.to_hashkey(key)) if changed_lacs: session.on_post_commit(enqueue_lacs, redis_client, changed_lacs, UPDATE_KEY['cell_lac']) session.commit() return cells_removed
def scan_lacs(self, batch=100): """ Find cell LACs that have changed and update the bounding box. This includes adding new LAC entries and removing them. """ redis_client = self.app.redis_client redis_lacs = dequeue_lacs(redis_client, UPDATE_KEY['cell_lac'], batch=batch) lacs = set([CellArea.to_hashkey(lac) for lac in redis_lacs]) for lac in lacs: update_lac.delay(lac.radio, lac.mcc, lac.mnc, lac.lac, cell_model_key='cell', cell_area_model_key='cell_area') return len(lacs)
def import_stations(session, filename, fields): with GzipFile(filename, 'rb') as zip_file: csv_reader = csv.DictReader(zip_file, fields) batch = 10000 rows = [] lacs = set() ins = OCIDCell.__table__.insert( on_duplicate=(('changeable = values(changeable), ' 'modified = values(modified), ' 'total_measures = values(total_measures), ' 'lat = values(lat), ' 'lon = values(lon), ' 'psc = values(psc), ' '`range` = values(`range`)'))) for row in csv_reader: # skip any header row if csv_reader.line_num == 1 and \ 'radio' in row.values(): # pragma: no cover continue data = make_ocid_cell_import_dict(row) if data is not None: rows.append(data) lacs.add(CellArea.to_hashkey(data)) if len(rows) == batch: # pragma: no cover session.execute(ins, rows) session.commit() rows = [] if rows: session.execute(ins, rows) session.commit() for lac in lacs: update_lac.delay(lac.radio, lac.mcc, lac.mnc, lac.lac, cell_model_key='ocid_cell', cell_area_model_key='ocid_cell_area')
def location_update_cell(self, min_new=10, max_new=100, batch=10): cells = [] redis_client = self.app.redis_client with self.db_session() as session: emit_new_observation_metric(self.stats_client, session, self.shortname, Cell, min_new, max_new) query = (session.query(Cell).filter( Cell.new_measures >= min_new).filter( Cell.new_measures < max_new).limit(batch)) cells = query.all() if not cells: return 0 moving_cells = set() updated_lacs = set() for cell in cells: query = session.query( CellObservation.lat, CellObservation.lon, CellObservation.id).filter(*CellObservation.joinkey(cell)) # only take the last X new_measures query = query.order_by(CellObservation.created.desc()).limit( cell.new_measures) observations = query.all() if observations: moving = calculate_new_position(cell, observations, CELL_MAX_DIST_KM) if moving: moving_cells.add(cell) updated_lacs.add(CellArea.to_hashkey(cell)) if updated_lacs: session.on_post_commit(enqueue_lacs, redis_client, updated_lacs, UPDATE_KEY['cell_lac']) if moving_cells: # some cells found to be moving too much blacklist_and_remove_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells))
def import_stations(session, filename, fields): with GzipFile(filename, 'rb') as zip_file: csv_reader = csv.DictReader(zip_file, fields) batch = 10000 rows = [] area_keys = set() ins = OCIDCell.__table__.insert( on_duplicate=(( 'changeable = values(changeable), ' 'modified = values(modified), ' 'total_measures = values(total_measures), ' 'lat = values(lat), ' 'lon = values(lon), ' 'psc = values(psc), ' '`range` = values(`range`)'))) for row in csv_reader: # skip any header row if csv_reader.line_num == 1 and \ 'radio' in row.values(): # pragma: no cover continue data = make_ocid_cell_import_dict(row) if data is not None: rows.append(data) area_keys.add(CellArea.to_hashkey(data)) if len(rows) == batch: # pragma: no cover session.execute(ins, rows) session.commit() rows = [] if rows: session.execute(ins, rows) session.commit() for area_key in area_keys: update_area.delay(area_key, cell_type='ocid')
def add_area_update(self, station): self.updated_areas.add(CellArea.to_hashkey(station))
def add_area_update(self, station_key): area_key = CellArea.to_hashkey(station_key) self.updated_areas.add(area_key)