示例#1
0
    def test_remove(self):
        area = CellAreaFactory()
        self.session.flush()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid], json=False)
        update_cellarea.delay().get()
        self.assertEqual(self.session.query(CellArea).count(), 0)
示例#2
0
 def import_csv(self, lo=1, hi=10, time=1408604686, cell_type='ocid'):
     task = FakeTask(self.celery_app)
     with self.get_csv(lo=lo, hi=hi, time=time) as path:
         with redis_pipeline(self.redis_client) as pipe:
             ImportLocal(task, pipe, cell_type=cell_type)(
                 self.session, filename=path)
     if cell_type == 'ocid':
         update_cellarea_ocid.delay().get()
     else:
         update_cellarea.delay().get()
示例#3
0
 def import_csv(self, lo=1, hi=10, time=1408604686, cell_type='ocid'):
     task = FakeTask(self.celery_app)
     with self.get_csv(lo=lo, hi=hi, time=time) as path:
         with redis_pipeline(self.redis_client) as pipe:
             ImportLocal(task, self.session, pipe,
                         cell_type=cell_type)(filename=path)
     if cell_type == 'ocid':
         update_cellarea_ocid.delay().get()
     else:
         update_cellarea.delay().get()
示例#4
0
 def import_csv(self, celery, redis, session, cell,
                lo=1, hi=10, time=1408604686, cell_type='ocid'):
     task = FakeTask(celery)
     with self.get_csv(cell, lo=lo, hi=hi, time=time) as path:
         with redis_pipeline(redis) as pipe:
             ImportLocal(task, cell_type=cell_type)(
                 pipe, session, filename=path)
     if cell_type == 'ocid':
         update_cellarea_ocid.delay().get()
     else:
         update_cellarea.delay().get()
示例#5
0
    def test_new(self):
        cell = CellFactory()
        self.session.flush()

        areaid = encode_cellarea(
            cell.radio, cell.mcc, cell.mnc, cell.lac)
        self.area_queue.enqueue([areaid], json=False)
        update_cellarea.delay().get()

        area = self.session.query(CellArea).one()
        self.assertAlmostEqual(area.lat, cell.lat)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.radius, 0)
        self.assertEqual(area.region, 'GB')
        self.assertEqual(area.num_cells, 1)
        self.assertEqual(area.avg_cell_radius, cell.radius)
示例#6
0
    def test_update_incomplete_cell(self):
        area = CellAreaFactory(radius=500)
        area_key = {'radio': area.radio, 'mcc': area.mcc,
                    'mnc': area.mnc, 'lac': area.lac}
        cell = CellFactory(lat=area.lat + 0.0002, lon=area.lon, **area_key)
        CellFactory(lat=None, lon=None, **area_key)
        CellFactory(lat=area.lat, lon=area.lon,
                    max_lat=None, min_lon=None, **area_key)
        self.session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid], json=False)
        update_cellarea.delay().get()

        self.session.refresh(area)
        self.assertAlmostEqual(area.lat, cell.lat - 0.0001)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.num_cells, 2)
示例#7
0
    def test_update(self):
        area = CellAreaFactory(num_cells=2, radius=500, avg_cell_radius=100)
        cell = CellFactory(
            lat=area.lat, lon=area.lon, radius=200,
            radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac)
        self.session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid], json=False)
        update_cellarea.delay().get()

        self.session.refresh(area)
        self.assertAlmostEqual(area.lat, cell.lat)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.radius, 0)
        self.assertEqual(area.region, 'GB')
        self.assertEqual(area.num_cells, 1)
        self.assertEqual(area.avg_cell_radius, 200)
示例#8
0
 def test_empty(self):
     update_cellarea.delay().get()
示例#9
0
def read_stations_from_csv(session, file_handle, redis_client, cellarea_queue):
    """
    Read stations from a public cell export CSV.

    :arg session: a database session
    :arg file_handle: an open file handle for the CSV data
    :arg redis_client: a Redis client
    :arg cellarea_queue: the DataQueue for updating cellarea IDs
    """
    # Avoid circular imports
    from ichnaea.data.tasks import update_cellarea, update_statregion

    csv_content = peekable(reader(file_handle))
    # UMTS was the original name for WCDMA stations
    radio_type = {"UMTS": "wcdma", "GSM": "gsm", "LTE": "lte", "": "Unknown"}

    counts = defaultdict(Counter)
    areas = set()
    areas_total = 0
    total = 0

    if not csv_content:
        LOGGER.warning("Nothing to process.")
        return

    first_row = csv_content.peek()
    if first_row == _FIELD_NAMES:
        # Skip the first row because it's a header row
        next(csv_content)
    else:
        LOGGER.warning("Expected header row, got data: %s", first_row)

    for row in csv_content:
        try:
            radio = radio_type[row[0]]
        except KeyError:
            raise InvalidCSV("Unknown radio type in row: %s" % row)

        if radio == "Unknown":
            LOGGER.warning("Skipping unknown radio: %s", row)
            continue

        try:
            data = {
                "radio": radio,
                "mcc": int(row[1]),
                "mnc": int(row[2]),
                "lac": int(row[3]),
                "cid": int(row[4]),
                "psc": int(row[5]) if row[5] else 0,
                "lon": float(row[6]),
                "lat": float(row[7]),
                # Some exported radiuses exceed the max and fail validation
                "radius": min(int(row[8]), CELL_MAX_RADIUS),
                "samples": int(row[9]),
                # row[10] is "changable", always 1 and not imported
                "created": datetime.fromtimestamp(int(row[11]), UTC),
                "modified": datetime.fromtimestamp(int(row[12]), UTC),
            }
            shard = CellShard.create(_raise_invalid=True, **data)
        except (colander.Invalid, ValueError) as e:
            if total == 0:
                # If the first row is invalid, it's likely the rest of the
                # file is, too--drop out here.
                raise InvalidCSV("first row %s is invalid: %s" % (row, e))
            else:
                LOGGER.warning("row %s is invalid: %s", row, e)
                continue

        # Is this station in the database?
        shard_type = shard.__class__
        existing = (session.query(shard_type).filter(
            shard_type.cellid == shard.cellid).options(
                load_only("modified")).one_or_none())

        if existing:
            if existing.modified < data["modified"]:
                # Update existing station with new data
                operation = "updated"
                existing.psc = shard.psc
                existing.lon = shard.lon
                existing.lat = shard.lat
                existing.radius = shard.radius
                existing.samples = shard.samples
                existing.created = shard.created
                existing.modified = shard.modified
            else:
                # Do nothing to existing station record
                operation = "found"
        else:
            # Add a new station record
            operation = "new"
            shard.min_lat = shard.lat
            shard.max_lat = shard.lat
            shard.min_lon = shard.lon
            shard.max_lon = shard.lon
            session.add(shard)

        counts[data["radio"]][operation] += 1

        # Process the cell area?
        if operation in {"new", "updated"}:
            areas.add(area_id(shard))

        # Process a chunk of stations, report on progress
        total += 1
        if total % 1000 == 0:
            session.commit()
            LOGGER.info("Processed %d stations", total)

        if areas and (len(areas) % 1000 == 0):
            session.commit()
            areas_total += len(areas)
            LOGGER.info("Processed %d station areas", areas_total)
            with redis_pipeline(redis_client) as pipe:
                cellarea_queue.enqueue(list(areas), pipe=pipe)
            update_cellarea.delay()
            areas = set()

    # Commit remaining station data
    session.commit()

    # Update the remaining cell areas
    if areas:
        areas_total += len(areas)
        with redis_pipeline(redis_client) as pipe:
            cellarea_queue.enqueue(list(areas), pipe=pipe)
        update_cellarea.delay()

    # Now that we've updated all the cell areas, we need to update the
    # statregion
    update_statregion.delay()

    # Summarize results
    LOGGER.info("Complete, processed %d station%s:", total,
                "" if total == 1 else "s")
    for radio_type, op_counts in sorted(counts.items()):
        LOGGER.info(
            "  %s: %d new, %d updated, %d already loaded",
            radio_type,
            op_counts["new"],
            op_counts["updated"],
            op_counts["found"],
        )
    if areas_total:
        LOGGER.info("  %d station area%s updated", areas_total,
                    "" if areas_total == 1 else "s")
示例#10
0
    def test_blocklist_temporary_and_permanent(self):
        # This test simulates a cell that moves once a month, for 2 years.
        # The first 2 * PERMANENT_BLOCKLIST_THRESHOLD (12) moves should be
        # temporary, forgotten after a week; after that it should be
        # permanently blocklisted.

        now = util.utcnow()
        # Station moves between these 4 points, all in the USA:
        points = [
            (40.0, -74.0),  # NYC
            (37.0, -122.0),  # SF
            (47.0, -122.0),  # Seattle
            (25.0, -80.0),  # Miami
        ]

        obs = CellObservationFactory(
            mcc=310, lat=points[0][0], lon=points[0][1])

        N = 4 * PERMANENT_BLOCKLIST_THRESHOLD
        for month in range(0, N):
            days_ago = (N - (month + 1)) * 30
            time = now - timedelta(days=days_ago)

            obs.lat = points[month % 4][0]
            obs.lon = points[month % 4][1]

            # Assuming PERMANENT_BLOCKLIST_THRESHOLD == 6:
            #
            # 0th insert will create the station
            # 1st insert will create first blocklist entry, delete station
            # 2nd insert will recreate the station at new position
            # 3rd insert will update blocklist, re-delete station
            # 4th insert will recreate the station at new position
            # 5th insert will update blocklist, re-delete station
            # 6th insert will recreate the station at new position
            # ...
            # 11th insert will make blocklisting permanent, re-delete station
            # 12th insert will not recreate station
            # 13th insert will not recreate station
            # ...
            # 23rd insert will not recreate station

            blocks = self.session.query(CellBlocklist).all()
            if month < 2:
                self.assertEqual(len(blocks), 0)
            else:
                self.assertEqual(len(blocks), 1)
                # force the blocklist back in time to whenever the
                # observation was supposedly inserted.
                block = blocks[0]
                block.time = time
                self.session.commit()

            if month < N / 2:
                # We still haven't exceeded the threshold, so the
                # observation was admitted.
                self.data_queue.enqueue([obs])
                if month % 2 == 0:
                    # The station was (re)created.
                    self.assertEqual(update_cell.delay().get(), (1, 0))
                    # Update cell areas
                    update_cellarea.delay().get()
                    # One cell + one cell-LAC record should exist.
                    self.assertEqual(self.session.query(Cell).count(), 1)
                    self.assertEqual(self.session.query(CellArea).count(), 1)
                else:
                    # The station existed and was seen moving,
                    # thereby activating the blocklist and deleting the cell.
                    self.assertEqual(update_cell.delay().get(), (1, 1))
                    # Update cell areas to delete orphaned area entry
                    update_cellarea.delay().get()
                    if month > 1:
                        self.assertEqual(block.count, ((month + 1) / 2))
                    self.assertEqual(
                        self.session.query(CellBlocklist).count(), 1)
                    self.assertEqual(self.session.query(Cell).count(), 0)

                    # Try adding one more observation
                    # to be sure it is dropped by the now-active blocklist.
                    self.data_queue.enqueue([obs])
                    self.assertEqual(update_cell.delay().get(), (0, 0))
            else:
                # Blocklist has exceeded threshold, gone to permanent mode,
                # so no observation accepted, no stations seen.
                self.data_queue.enqueue([obs])
                self.assertEqual(update_cell.delay().get(), (0, 0))