Exemplo n.º 1
0
def cluster_areas(areas, lookups, min_age=0):
    """
    Cluster areas, treat each area as its own cluster.
    """
    now = util.utcnow()
    today = now.date()

    # Create a dict of area ids mapped to their age and signal strength.
    obs_data = {}
    for lookup in lookups:
        obs_data[decode_cellarea(lookup.areaid)] = (
            max(abs(lookup.age or min_age), 1000),
            lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType])

    clusters = []
    for area in areas:
        clusters.append(numpy.array([(
            area.lat, area.lon, area.radius,
            obs_data[area.areaid][0],
            obs_data[area.areaid][1],
            area_score(area, now),
            encode_cellarea(*area.areaid),
            bool(area.last_seen >= today))],
            dtype=NETWORK_DTYPE))

    return clusters
Exemplo n.º 2
0
    def test_update(self, celery, session):
        today = util.utcnow().date()
        yesterday = today - timedelta(days=1)
        area = self.area_factory(
            num_cells=2, radius=500, avg_cell_radius=100, last_seen=yesterday)
        cell = self.cell_factory(
            lat=area.lat, lon=area.lon, radius=200, last_seen=today,
            radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac)
        self.cell_factory(
            lat=area.lat, lon=area.lon, radius=300, last_seen=yesterday,
            radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac)
        session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue(celery).enqueue([areaid])
        self.task.delay().get()

        session.refresh(area)
        assert area.lat == cell.lat
        assert area.lon == cell.lon
        assert area.radius == 0
        assert area.region == 'GB'
        assert area.avg_cell_radius == 250
        assert area.num_cells == 2
        assert area.last_seen == today
Exemplo n.º 3
0
def cluster_areas(areas, lookups, min_age=0):
    """
    Cluster areas, treat each area as its own cluster.
    """
    now = util.utcnow()
    today = now.date()

    # Create a dict of area ids mapped to their age and signal strength.
    obs_data = {}
    for lookup in lookups:
        obs_data[decode_cellarea(lookup.areaid)] = (
            max(abs(lookup.age or min_age), 1000),
            lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType],
        )

    clusters = []
    for area in areas:
        clusters.append(
            numpy.array(
                [(
                    area.lat,
                    area.lon,
                    area.radius,
                    obs_data[area.areaid][0],
                    obs_data[area.areaid][1],
                    area_score(area, now),
                    encode_cellarea(*area.areaid, codec="base64"),
                    bool(area.last_seen is not None
                         and area.last_seen >= today),
                )],
                dtype=NETWORK_DTYPE,
            ))

    return clusters
Exemplo n.º 4
0
    def import_stations(self, session, pipe, filename):
        today = util.utcnow().date()

        on_duplicate = (
            '`modified` = values(`modified`)'
            ', `lat` = values(`lat`)'
            ', `lon` = values(`lon`)'
            ', `psc` = values(`psc`)'
            ', `max_lat` = values(`max_lat`)'
            ', `min_lat` = values(`min_lat`)'
            ', `max_lon` = values(`max_lon`)'
            ', `min_lon` = values(`min_lon`)'
            ', `radius` = values(`radius`)'
            ', `samples` = values(`samples`)'
        )

        table_insert = self.cell_model.__table__.insert(
            mysql_on_duplicate=on_duplicate)

        def commit_batch(rows):
            result = session.execute(table_insert, rows)
            count = result.rowcount
            # apply trick to avoid querying for existing rows,
            # MySQL claims 1 row for an inserted row, 2 for an updated row
            inserted_rows = 2 * len(rows) - count
            changed_rows = count - len(rows)
            assert inserted_rows + changed_rows == len(rows)
            StatCounter(self.stat_key, today).incr(pipe, inserted_rows)

        areaids = set()

        with util.gzip_open(filename, 'r') as gzip_wrapper:
            with gzip_wrapper as gzip_file:
                csv_reader = csv.reader(gzip_file)
                parse_row = partial(self.make_import_dict,
                                    self.cell_model.validate,
                                    self.import_spec)
                rows = []
                for row in csv_reader:
                    # skip any header row
                    if (csv_reader.line_num == 1 and
                            row[0] == 'radio'):  # pragma: no cover
                        continue

                    data = parse_row(row)
                    if data is not None:
                        rows.append(data)
                        areaids.add((int(data['radio']), data['mcc'],
                                    data['mnc'], data['lac']))

                    if len(rows) == self.batch_size:  # pragma: no cover
                        commit_batch(rows)
                        session.flush()
                        rows = []

                if rows:
                    commit_batch(rows)

        self.area_queue.enqueue(
            [encode_cellarea(*id_) for id_ in areaids], json=False)
Exemplo n.º 5
0
    def test_update(self):
        today = util.utcnow().date()
        yesterday = today - timedelta(days=1)
        area = self.area_factory(
            num_cells=2, radius=500, avg_cell_radius=100, last_seen=yesterday)
        cell = self.cell_factory(
            lat=area.lat, lon=area.lon, radius=200, last_seen=today,
            radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac)
        self.cell_factory(
            lat=area.lat, lon=area.lon, radius=300, last_seen=yesterday,
            radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac)
        self.session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid])
        self.task.delay().get()

        self.session.refresh(area)
        self.assertAlmostEqual(area.lat, cell.lat)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.radius, 0)
        self.assertEqual(area.region, 'GB')
        self.assertEqual(area.avg_cell_radius, 250)
        self.assertEqual(area.num_cells, 2)
        self.assertEqual(area.last_seen, today)
    def test_update(self, celery, session):
        today = util.utcnow().date()
        yesterday = today - timedelta(days=1)
        area = self.area_factory(
            num_cells=2, radius=500, avg_cell_radius=100, last_seen=yesterday)
        cell = self.cell_factory(
            lat=area.lat, lon=area.lon, radius=200, last_seen=today,
            radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac)
        self.cell_factory(
            lat=area.lat, lon=area.lon, radius=300, last_seen=yesterday,
            radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac)
        session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue(celery).enqueue([areaid])
        self.task.delay().get()

        session.refresh(area)
        assert area.lat == cell.lat
        assert area.lon == cell.lon
        assert area.radius == 0
        assert area.region == 'GB'
        assert area.avg_cell_radius == 250
        assert area.num_cells == 2
        assert area.last_seen == today
Exemplo n.º 7
0
    def test_remove(self):
        area = self.area_factory()
        self.session.flush()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid])
        self.task.delay().get()
        self.assertEqual(self.session.query(self.area_model).count(), 0)
Exemplo n.º 8
0
    def test_remove(self):
        area = CellAreaFactory()
        self.session.flush()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid], json=False)
        update_cellarea.delay().get()
        self.assertEqual(self.session.query(CellArea).count(), 0)
Exemplo n.º 9
0
    def test_remove(self, celery, session):
        area = self.area_factory()
        session.flush()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue(celery).enqueue([areaid])
        self.task.delay().get()
        assert session.query(self.area_model).count() == 0
Exemplo n.º 10
0
    def test_remove(self, celery, session):
        area = self.area_factory()
        session.flush()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue(celery).enqueue([areaid])
        self.task.delay().get()
        assert session.query(self.area_model).count() == 0
Exemplo n.º 11
0
    def test_new(self):
        cell = CellFactory()
        self.session.flush()

        areaid = encode_cellarea(
            cell.radio, cell.mcc, cell.mnc, cell.lac)
        self.area_queue.enqueue([areaid], json=False)
        update_cellarea.delay().get()

        area = self.session.query(CellArea).one()
        self.assertAlmostEqual(area.lat, cell.lat)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.radius, 0)
        self.assertEqual(area.region, 'GB')
        self.assertEqual(area.num_cells, 1)
        self.assertEqual(area.avg_cell_radius, cell.radius)
Exemplo n.º 12
0
    def test_new(self, celery, session):
        cell = self.cell_factory()
        session.flush()

        areaid = encode_cellarea(cell.radio, cell.mcc, cell.mnc, cell.lac)
        self.area_queue(celery).enqueue([areaid])
        self.task.delay().get()

        area = session.query(self.area_model).one()
        assert area.lat == cell.lat
        assert area.lon == cell.lon
        assert area.radius == 0
        assert area.region == "GB"
        assert area.avg_cell_radius == cell.radius
        assert area.num_cells == 1
        assert area.last_seen == cell.last_seen
Exemplo n.º 13
0
    def test_new(self):
        cell = self.cell_factory()
        self.session.flush()

        areaid = encode_cellarea(
            cell.radio, cell.mcc, cell.mnc, cell.lac)
        self.area_queue.enqueue([areaid])
        self.task.delay().get()

        area = self.session.query(self.area_model).one()
        self.assertAlmostEqual(area.lat, cell.lat)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.radius, 0)
        self.assertEqual(area.region, 'GB')
        self.assertEqual(area.avg_cell_radius, cell.radius)
        self.assertEqual(area.num_cells, 1)
        self.assertEqual(area.last_seen, cell.last_seen)
Exemplo n.º 14
0
    def test_new(self, celery, session):
        cell = self.cell_factory()
        session.flush()

        areaid = encode_cellarea(
            cell.radio, cell.mcc, cell.mnc, cell.lac)
        self.area_queue(celery).enqueue([areaid])
        self.task.delay().get()

        area = session.query(self.area_model).one()
        assert area.lat == cell.lat
        assert area.lon == cell.lon
        assert area.radius == 0
        assert area.region == 'GB'
        assert area.avg_cell_radius == cell.radius
        assert area.num_cells == 1
        assert area.last_seen == cell.last_seen
Exemplo n.º 15
0
    def test_update_incomplete_cell(self):
        area = CellAreaFactory(radius=500)
        area_key = {'radio': area.radio, 'mcc': area.mcc,
                    'mnc': area.mnc, 'lac': area.lac}
        cell = CellFactory(lat=area.lat + 0.0002, lon=area.lon, **area_key)
        CellFactory(lat=None, lon=None, **area_key)
        CellFactory(lat=area.lat, lon=area.lon,
                    max_lat=None, min_lon=None, **area_key)
        self.session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid], json=False)
        update_cellarea.delay().get()

        self.session.refresh(area)
        self.assertAlmostEqual(area.lat, cell.lat - 0.0001)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.num_cells, 2)
Exemplo n.º 16
0
    def test_update(self):
        area = CellAreaFactory(num_cells=2, radius=500, avg_cell_radius=100)
        cell = CellFactory(
            lat=area.lat, lon=area.lon, radius=200,
            radio=area.radio, mcc=area.mcc, mnc=area.mnc, lac=area.lac)
        self.session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid], json=False)
        update_cellarea.delay().get()

        self.session.refresh(area)
        self.assertAlmostEqual(area.lat, cell.lat)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.radius, 0)
        self.assertEqual(area.region, 'GB')
        self.assertEqual(area.num_cells, 1)
        self.assertEqual(area.avg_cell_radius, 200)
Exemplo n.º 17
0
def cluster_areas(areas, lookups):
    """
    Cluster areas, treat each area as its own cluster.
    """
    now = util.utcnow()

    # Create a dict of area ids mapped to their signal strength.
    signals = {}
    for lookup in lookups:
        signals[lookup.areaid] = lookup.signal or MIN_CELL_SIGNAL

    clusters = []
    for area in areas:
        clusters.append(numpy.array(
            [(area.lat, area.lon, area.radius,
              signals[encode_cellarea(*area.areaid)], area.score(now))],
            dtype=NETWORK_DTYPE))

    return clusters
Exemplo n.º 18
0
    def test_update_incomplete_cell(self):
        area = self.area_factory(radius=500)
        area_key = {'radio': area.radio, 'mcc': area.mcc,
                    'mnc': area.mnc, 'lac': area.lac}
        cell = self.cell_factory(lat=area.lat + 0.0002,
                                 lon=area.lon, **area_key)
        self.cell_factory(lat=None, lon=None, **area_key)
        self.cell_factory(lat=area.lat, lon=area.lon,
                          max_lat=None, min_lon=None, **area_key)
        self.session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue.enqueue([areaid])
        self.task.delay().get()

        self.session.refresh(area)
        self.assertAlmostEqual(area.lat, cell.lat - 0.0001)
        self.assertAlmostEqual(area.lon, cell.lon)
        self.assertEqual(area.num_cells, 2)
Exemplo n.º 19
0
    def test_update_incomplete_cell(self, celery, session):
        area = self.area_factory(radius=500)
        area_key = {'radio': area.radio, 'mcc': area.mcc,
                    'mnc': area.mnc, 'lac': area.lac}
        cell = self.cell_factory(lat=area.lat + 0.0002,
                                 lon=area.lon, **area_key)
        self.cell_factory(lat=None, lon=None, **area_key)
        self.cell_factory(lat=area.lat, lon=area.lon,
                          max_lat=None, min_lon=None, **area_key)
        session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue(celery).enqueue([areaid])
        self.task.delay().get()

        session.refresh(area)
        assert round(area.lat, 7) == round(cell.lat - 0.0001, 7)
        assert round(area.lon, 7) == round(cell.lon, 7)
        assert area.num_cells == 2
    def test_update_incomplete_cell(self, celery, session):
        area = self.area_factory(radius=500)
        area_key = {'radio': area.radio, 'mcc': area.mcc,
                    'mnc': area.mnc, 'lac': area.lac}
        cell = self.cell_factory(lat=area.lat + 0.0002,
                                 lon=area.lon, **area_key)
        self.cell_factory(lat=None, lon=None, **area_key)
        self.cell_factory(lat=area.lat, lon=area.lon,
                          max_lat=None, min_lon=None, **area_key)
        session.commit()

        areaid = encode_cellarea(*area.areaid)
        self.area_queue(celery).enqueue([areaid])
        self.task.delay().get()

        session.refresh(area)
        assert round(area.lat, 7) == round(cell.lat - 0.0001, 7)
        assert round(area.lon, 7) == round(cell.lon, 7)
        assert area.num_cells == 2
Exemplo n.º 21
0
def cluster_areas(areas, lookups):
    """
    Cluster areas, treat each area as its own cluster.
    """
    now = util.utcnow()

    # Create a dict of area ids mapped to their signal strength.
    signals = {}
    for lookup in lookups:
        signals[lookup.areaid] = (lookup.signal
                                  or MIN_CELL_SIGNAL[lookup.radio])

    clusters = []
    for area in areas:
        clusters.append(
            numpy.array(
                [(area.lat, area.lon, area.radius,
                  signals[encode_cellarea(*area.areaid)], area.score(now))],
                dtype=NETWORK_DTYPE))

    return clusters
Exemplo n.º 22
0
 def add_area_update(self, key):
     self.updated_areas.add(encode_cellarea(*decode_cellid(key)[:4]))
Exemplo n.º 23
0
 def check_areas(self, celery, obs):
     queue = celery.data_queues["update_cellarea"]
     queued = set(queue.dequeue())
     cellids = [decode_cellid(ob.unique_key) for ob in obs]
     areaids = set([encode_cellarea(*cellid[:4]) for cellid in cellids])
     assert queued == areaids
Exemplo n.º 24
0
    def import_stations(self, session, pipe, filename):
        today = util.utcnow().date()
        shards = self.cell_model.shards()

        on_duplicate = ('`modified` = values(`modified`)'
                        ', `lat` = values(`lat`)'
                        ', `lon` = values(`lon`)'
                        ', `psc` = values(`psc`)'
                        ', `max_lat` = values(`max_lat`)'
                        ', `min_lat` = values(`min_lat`)'
                        ', `max_lon` = values(`max_lon`)'
                        ', `min_lon` = values(`min_lon`)'
                        ', `radius` = values(`radius`)'
                        ', `samples` = values(`samples`)')

        def commit_batch(rows):
            all_inserted_rows = 0
            for shard_id, shard_rows in rows.items():
                table_insert = shards[shard_id].__table__.insert(
                    mysql_on_duplicate=on_duplicate)

                result = session.execute(table_insert, shard_rows)
                count = result.rowcount
                # apply trick to avoid querying for existing rows,
                # MySQL claims 1 row for an inserted row, 2 for an updated row
                inserted_rows = 2 * len(shard_rows) - count
                changed_rows = count - len(shard_rows)
                assert inserted_rows + changed_rows == len(shard_rows)
                all_inserted_rows += inserted_rows
            StatCounter(self.stat_key, today).incr(pipe, all_inserted_rows)

        areaids = set()

        with util.gzip_open(filename, 'r') as gzip_wrapper:
            with gzip_wrapper as gzip_file:
                cell_model = self.cell_model
                csv_reader = csv.reader(gzip_file)
                parse_row = partial(self.make_import_dict,
                                    self.cell_model.validate, self.import_spec)

                rows = defaultdict(list)
                row_count = 0
                for row in csv_reader:
                    # skip any header row
                    if (csv_reader.line_num == 1
                            and row[0] == 'radio'):  # pragma: no cover
                        continue

                    data = parse_row(row)
                    if data is not None:
                        rows[cell_model.shard_id(data['radio'])].append(data)
                        row_count += 1
                        areaids.add((int(data['radio']), data['mcc'],
                                     data['mnc'], data['lac']))

                    if row_count == self.batch_size:  # pragma: no cover
                        commit_batch(rows)
                        session.flush()
                        rows = defaultdict(list)
                        row_count = 0

                if rows:
                    commit_batch(rows)

        self.area_queue.enqueue([encode_cellarea(*id_) for id_ in areaids],
                                json=False)
Exemplo n.º 25
0
 def check_areas(self, celery, obs):
     queue = celery.data_queues['update_cellarea']
     queued = set(queue.dequeue())
     cellids = [decode_cellid(ob.unique_key) for ob in obs]
     areaids = set([encode_cellarea(*cellid[:4]) for cellid in cellids])
     assert queued == areaids
Exemplo n.º 26
0
 def add_area_update(self, updated_areas, key):
     updated_areas.add(encode_cellarea(*decode_cellid(key)[:4]))