def test_remove(self): area = CellAreaFactory() self.session.flush() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid], json=False) update_cellarea.delay().get() self.assertEqual(self.session.query(CellArea).count(), 0)
def import_csv(self, lo=1, hi=10, time=1408604686, cell_type='ocid'): task = FakeTask(self.celery_app) with self.get_csv(lo=lo, hi=hi, time=time) as path: with redis_pipeline(self.redis_client) as pipe: ImportLocal(task, pipe, cell_type=cell_type)( self.session, filename=path) if cell_type == 'ocid': update_cellarea_ocid.delay().get() else: update_cellarea.delay().get()
def import_csv(self, lo=1, hi=10, time=1408604686, cell_type='ocid'): task = FakeTask(self.celery_app) with self.get_csv(lo=lo, hi=hi, time=time) as path: with redis_pipeline(self.redis_client) as pipe: ImportLocal(task, self.session, pipe, cell_type=cell_type)(filename=path) if cell_type == 'ocid': update_cellarea_ocid.delay().get() else: update_cellarea.delay().get()
def import_csv(self, celery, redis, session, cell, lo=1, hi=10, time=1408604686, cell_type='ocid'): task = FakeTask(celery) with self.get_csv(cell, lo=lo, hi=hi, time=time) as path: with redis_pipeline(redis) as pipe: ImportLocal(task, cell_type=cell_type)( pipe, session, filename=path) if cell_type == 'ocid': update_cellarea_ocid.delay().get() else: update_cellarea.delay().get()
def test_new(self): cell = CellFactory() self.session.flush() areaid = encode_cellarea( cell.radio, cell.mcc, cell.mnc, cell.lac) self.area_queue.enqueue([areaid], json=False) update_cellarea.delay().get() area = self.session.query(CellArea).one() self.assertAlmostEqual(area.lat, cell.lat) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.radius, 0) self.assertEqual(area.region, 'GB') self.assertEqual(area.num_cells, 1) self.assertEqual(area.avg_cell_radius, cell.radius)
def test_update_incomplete_cell(self): area = CellAreaFactory(radius=500) area_key = {'radio': area.radio, 'mcc': area.mcc, 'mnc': area.mnc, 'lac': area.lac} cell = CellFactory(lat=area.lat + 0.0002, lon=area.lon, **area_key) CellFactory(lat=None, lon=None, **area_key) CellFactory(lat=area.lat, lon=area.lon, max_lat=None, min_lon=None, **area_key) self.session.commit() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid], json=False) update_cellarea.delay().get() self.session.refresh(area) self.assertAlmostEqual(area.lat, cell.lat - 0.0001) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.num_cells, 2)
def test_update(self): area = CellAreaFactory(num_cells=2, radius=500, avg_cell_radius=100) cell = CellFactory( lat=area.lat, lon=area.lon, radius=200, radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac) self.session.commit() areaid = encode_cellarea(*area.areaid) self.area_queue.enqueue([areaid], json=False) update_cellarea.delay().get() self.session.refresh(area) self.assertAlmostEqual(area.lat, cell.lat) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.radius, 0) self.assertEqual(area.region, 'GB') self.assertEqual(area.num_cells, 1) self.assertEqual(area.avg_cell_radius, 200)
def test_empty(self): update_cellarea.delay().get()
def read_stations_from_csv(session, file_handle, redis_client, cellarea_queue): """ Read stations from a public cell export CSV. :arg session: a database session :arg file_handle: an open file handle for the CSV data :arg redis_client: a Redis client :arg cellarea_queue: the DataQueue for updating cellarea IDs """ # Avoid circular imports from ichnaea.data.tasks import update_cellarea, update_statregion csv_content = peekable(reader(file_handle)) # UMTS was the original name for WCDMA stations radio_type = {"UMTS": "wcdma", "GSM": "gsm", "LTE": "lte", "": "Unknown"} counts = defaultdict(Counter) areas = set() areas_total = 0 total = 0 if not csv_content: LOGGER.warning("Nothing to process.") return first_row = csv_content.peek() if first_row == _FIELD_NAMES: # Skip the first row because it's a header row next(csv_content) else: LOGGER.warning("Expected header row, got data: %s", first_row) for row in csv_content: try: radio = radio_type[row[0]] except KeyError: raise InvalidCSV("Unknown radio type in row: %s" % row) if radio == "Unknown": LOGGER.warning("Skipping unknown radio: %s", row) continue try: data = { "radio": radio, "mcc": int(row[1]), "mnc": int(row[2]), "lac": int(row[3]), "cid": int(row[4]), "psc": int(row[5]) if row[5] else 0, "lon": float(row[6]), "lat": float(row[7]), # Some exported radiuses exceed the max and fail validation "radius": min(int(row[8]), CELL_MAX_RADIUS), "samples": int(row[9]), # row[10] is "changable", always 1 and not imported "created": datetime.fromtimestamp(int(row[11]), UTC), "modified": datetime.fromtimestamp(int(row[12]), UTC), } shard = CellShard.create(_raise_invalid=True, **data) except (colander.Invalid, ValueError) as e: if total == 0: # If the first row is invalid, it's likely the rest of the # file is, too--drop out here. raise InvalidCSV("first row %s is invalid: %s" % (row, e)) else: LOGGER.warning("row %s is invalid: %s", row, e) continue # Is this station in the database? shard_type = shard.__class__ existing = (session.query(shard_type).filter( shard_type.cellid == shard.cellid).options( load_only("modified")).one_or_none()) if existing: if existing.modified < data["modified"]: # Update existing station with new data operation = "updated" existing.psc = shard.psc existing.lon = shard.lon existing.lat = shard.lat existing.radius = shard.radius existing.samples = shard.samples existing.created = shard.created existing.modified = shard.modified else: # Do nothing to existing station record operation = "found" else: # Add a new station record operation = "new" shard.min_lat = shard.lat shard.max_lat = shard.lat shard.min_lon = shard.lon shard.max_lon = shard.lon session.add(shard) counts[data["radio"]][operation] += 1 # Process the cell area? if operation in {"new", "updated"}: areas.add(area_id(shard)) # Process a chunk of stations, report on progress total += 1 if total % 1000 == 0: session.commit() LOGGER.info("Processed %d stations", total) if areas and (len(areas) % 1000 == 0): session.commit() areas_total += len(areas) LOGGER.info("Processed %d station areas", areas_total) with redis_pipeline(redis_client) as pipe: cellarea_queue.enqueue(list(areas), pipe=pipe) update_cellarea.delay() areas = set() # Commit remaining station data session.commit() # Update the remaining cell areas if areas: areas_total += len(areas) with redis_pipeline(redis_client) as pipe: cellarea_queue.enqueue(list(areas), pipe=pipe) update_cellarea.delay() # Now that we've updated all the cell areas, we need to update the # statregion update_statregion.delay() # Summarize results LOGGER.info("Complete, processed %d station%s:", total, "" if total == 1 else "s") for radio_type, op_counts in sorted(counts.items()): LOGGER.info( " %s: %d new, %d updated, %d already loaded", radio_type, op_counts["new"], op_counts["updated"], op_counts["found"], ) if areas_total: LOGGER.info(" %d station area%s updated", areas_total, "" if areas_total == 1 else "s")
def test_blocklist_temporary_and_permanent(self): # This test simulates a cell that moves once a month, for 2 years. # The first 2 * PERMANENT_BLOCKLIST_THRESHOLD (12) moves should be # temporary, forgotten after a week; after that it should be # permanently blocklisted. now = util.utcnow() # Station moves between these 4 points, all in the USA: points = [ (40.0, -74.0), # NYC (37.0, -122.0), # SF (47.0, -122.0), # Seattle (25.0, -80.0), # Miami ] obs = CellObservationFactory( mcc=310, lat=points[0][0], lon=points[0][1]) N = 4 * PERMANENT_BLOCKLIST_THRESHOLD for month in range(0, N): days_ago = (N - (month + 1)) * 30 time = now - timedelta(days=days_ago) obs.lat = points[month % 4][0] obs.lon = points[month % 4][1] # Assuming PERMANENT_BLOCKLIST_THRESHOLD == 6: # # 0th insert will create the station # 1st insert will create first blocklist entry, delete station # 2nd insert will recreate the station at new position # 3rd insert will update blocklist, re-delete station # 4th insert will recreate the station at new position # 5th insert will update blocklist, re-delete station # 6th insert will recreate the station at new position # ... # 11th insert will make blocklisting permanent, re-delete station # 12th insert will not recreate station # 13th insert will not recreate station # ... # 23rd insert will not recreate station blocks = self.session.query(CellBlocklist).all() if month < 2: self.assertEqual(len(blocks), 0) else: self.assertEqual(len(blocks), 1) # force the blocklist back in time to whenever the # observation was supposedly inserted. block = blocks[0] block.time = time self.session.commit() if month < N / 2: # We still haven't exceeded the threshold, so the # observation was admitted. self.data_queue.enqueue([obs]) if month % 2 == 0: # The station was (re)created. self.assertEqual(update_cell.delay().get(), (1, 0)) # Update cell areas update_cellarea.delay().get() # One cell + one cell-LAC record should exist. self.assertEqual(self.session.query(Cell).count(), 1) self.assertEqual(self.session.query(CellArea).count(), 1) else: # The station existed and was seen moving, # thereby activating the blocklist and deleting the cell. self.assertEqual(update_cell.delay().get(), (1, 1)) # Update cell areas to delete orphaned area entry update_cellarea.delay().get() if month > 1: self.assertEqual(block.count, ((month + 1) / 2)) self.assertEqual( self.session.query(CellBlocklist).count(), 1) self.assertEqual(self.session.query(Cell).count(), 0) # Try adding one more observation # to be sure it is dropped by the now-active blocklist. self.data_queue.enqueue([obs]) self.assertEqual(update_cell.delay().get(), (0, 0)) else: # Blocklist has exceeded threshold, gone to permanent mode, # so no observation accepted, no stations seen. self.data_queue.enqueue([obs]) self.assertEqual(update_cell.delay().get(), (0, 0))