def blacklist_moving_wifis(ago=1, offset=0, batch=1000): # TODO: this doesn't take into account wifi AP's which have # permanently moved after a certain date # maximum difference of two decimal places, ~1km at equator # or 500m at 67 degrees north max_difference = 100000 day, max_day = daily_task_days(ago) try: with blacklist_moving_wifis.db_session() as session: query = session.query(distinct( WifiMeasure.key)).filter(WifiMeasure.created < max_day).filter( WifiMeasure.created >= day).order_by( WifiMeasure.id).limit(batch).offset(offset) new_wifis = [w[0] for w in query.all()] if not new_wifis: # pragma: no cover # nothing to be done return [] # check min/max lat/lon query = session.query(WifiMeasure.key, func.max(WifiMeasure.lat), func.min(WifiMeasure.lat), func.max(WifiMeasure.lon), func.min(WifiMeasure.lon)).filter( WifiMeasure.key.in_(new_wifis)).group_by( WifiMeasure.key) results = query.all() moving_keys = set() for result in results: wifi_key, max_lat, min_lat, max_lon, min_lon = result diff_lat = abs(max_lat - min_lat) diff_lon = abs(max_lon - min_lon) if diff_lat >= max_difference or diff_lon >= max_difference: moving_keys.add(wifi_key) if moving_keys: utcnow = datetime.utcnow() query = session.query(WifiBlacklist.key).filter( WifiBlacklist.key.in_(moving_keys)) already_blocked = set([a[0] for a in query.all()]) moving_keys = moving_keys - already_blocked if not moving_keys: return [] for key in moving_keys: # TODO: on duplicate key ignore session.add(WifiBlacklist(key=key, created=utcnow)) remove_wifi.delay(list(moving_keys)) session.commit() return moving_keys except IntegrityError as exc: # pragma: no cover # TODO log error return [] except Exception as exc: # pragma: no cover raise blacklist_moving_wifis.retry(exc=exc)
def test_wifi_blacklist(self): from ichnaea.service.submit.tasks import insert_wifi_measure session = self.db_master_session bad_key = "ab1234567890" good_key = "cd1234567890" black = WifiBlacklist(key=bad_key) session.add(black) session.flush() measure = dict(id=0, lat=10000000, lon=20000000) entries = [{"key": good_key}, {"key": good_key}, {"key": bad_key}] result = insert_wifi_measure.delay(measure, entries) self.assertEqual(result.get(), 2) measures = session.query(WifiMeasure).all() self.assertEqual(len(measures), 2) self.assertEqual(set([m.key for m in measures]), set([good_key]))
def test_blacklist_time_used_as_creation_time(self): now = util.utcnow() last_week = now - TEMPORARY_BLACKLIST_DURATION - timedelta(days=1) session = self.db_master_session wifi_key = "ab1234567890" session.add(WifiBlacklist(time=last_week, count=1, key=wifi_key)) session.flush() # add a new entry for the previously blacklisted wifi obs = dict(lat=PARIS_LAT, lon=PARIS_LON, key=wifi_key) insert_measures_wifi.delay([obs]).get() # the wifi was inserted again wifis = session.query(Wifi).all() self.assertEqual(len(wifis), 1) # and the creation date was set to the date of the blacklist entry self.assertEqual(wifis[0].created, last_week)
def test_blacklist(self): session = self.db_master_session bad_key = "ab1234567890" good_key = "cd1234567890" black = WifiBlacklist(key=bad_key) session.add(black) session.flush() measure = dict(lat=1, lon=2) entries = [{"key": good_key}, {"key": good_key}, {"key": bad_key}] for e in entries: e.update(measure) result = insert_measures_wifi.delay(entries) self.assertEqual(result.get(), 2) measures = session.query(WifiMeasure).all() self.assertEqual(len(measures), 2) self.assertEqual(set([m.key for m in measures]), set([good_key])) wifis = session.query(Wifi).all() self.assertEqual(len(wifis), 1) self.assertEqual(set([w.key for w in wifis]), set([good_key]))
def test_blacklist_moving_wifis(self): from ichnaea.tasks import blacklist_moving_wifis session = self.db_master_session k1 = "ab1234567890" k2 = "cd1234567890" k3 = "ef1234567890" k4 = "b01234567890" k5 = "d21234567890" measures = [ WifiMeasure(lat=10010000, lon=10010000, key=k1), WifiMeasure(lat=10020000, lon=10050000, key=k1), WifiMeasure(lat=10030000, lon=10090000, key=k1), WifiMeasure(lat=20100000, lon=20000000, key=k2), WifiMeasure(lat=20200000, lon=20000000, key=k2), WifiMeasure(lat=30000000, lon=30000000, key=k3), WifiMeasure(lat=-30000000, lon=30000000, key=k3), WifiMeasure(lat=-41000000, lon=40000000, key=k4), WifiMeasure(lat=-41100000, lon=40000000, key=k4), WifiMeasure(lat=50000000, lon=50000000, key=k5), WifiMeasure(lat=51000000, lon=50000000, key=k5), ] session.add_all(measures) session.add(WifiBlacklist(key=k5)) session.commit() result = blacklist_moving_wifis.delay(ago=0) self.assertEqual(sorted(result.get()), sorted([k2, k3, k4])) measures = session.query(WifiBlacklist).all() self.assertEqual(len(measures), 4) self.assertEqual(set([m.key for m in measures]), set([k2, k3, k4, k5])) measures = session.query(WifiMeasure).all() self.assertEqual(len(measures), 5) self.assertEqual(set([m.key for m in measures]), set([k1, k5])) # test duplicate call result = blacklist_moving_wifis.delay(ago=0) self.assertEqual(result.get(), [])
def test_blacklist(self): utcnow = util.utcnow() session = self.db_master_session bad_key = "ab1234567890" good_key = "cd1234567890" black = WifiBlacklist(time=utcnow, count=1, key=bad_key) session.add(black) session.flush() obs = dict(lat=1, lon=2) entries = [{"key": good_key}, {"key": good_key}, {"key": bad_key}] for e in entries: e.update(obs) result = insert_measures_wifi.delay(entries) self.assertEqual(result.get(), 2) observations = session.query(WifiObservation).all() self.assertEqual(len(observations), 2) self.assertEqual(set([o.key for o in observations]), set([good_key])) wifis = session.query(Wifi).all() self.assertEqual(len(wifis), 1) self.assertEqual(set([w.key for w in wifis]), set([good_key]))
def _make_one(self, **kw): from ichnaea.models import WifiBlacklist return WifiBlacklist(**kw)
def test_blacklist_moving_wifis(self): now = util.utcnow() long_ago = now - timedelta(days=40) session = self.db_master_session k1 = "ab1234567890" k2 = "bc1234567890" k3 = "cd1234567890" k4 = "de1234567890" k5 = "ef1234567890" k6 = "fa1234567890" keys = set([k1, k2, k3, k4, k5, k6]) # keys k2, k3 and k4 are expected to be detected as moving data = [ # a wifi with an entry but no prior position Wifi(key=k1, new_measures=3, total_measures=0), WifiMeasure(lat=1.001, lon=1.001, key=k1), WifiMeasure(lat=1.002, lon=1.005, key=k1), WifiMeasure(lat=1.003, lon=1.009, key=k1), # a wifi with a prior known position Wifi(lat=2.0, lon=2.0, key=k2, new_measures=2, total_measures=1), WifiMeasure(lat=2.01, lon=2, key=k2), WifiMeasure(lat=2.07, lon=2, key=k2), # a wifi with a very different prior position Wifi(lat=1.0, lon=1.0, key=k3, new_measures=2, total_measures=1), WifiMeasure(lat=3.0, lon=3.0, key=k3), WifiMeasure(lat=-3.0, lon=3.0, key=k3), # another wifi with a prior known position (and negative lat) Wifi(lat=-4.0, lon=4.0, key=k4, new_measures=2, total_measures=1), WifiMeasure(lat=-4.1, lon=4, key=k4), WifiMeasure(lat=-4.16, lon=4, key=k4), # an already blacklisted wifi WifiBlacklist(key=k5), WifiMeasure(lat=5.0, lon=5.0, key=k5), WifiMeasure(lat=5.1, lon=5.0, key=k5), # a wifi with an old different record we ignore, position # estimate has been updated since Wifi(lat=6.0, lon=6.0, key=k6, new_measures=2, total_measures=1), WifiMeasure(lat=6.9, lon=6.9, key=k6, time=long_ago), WifiMeasure(lat=6.0, lon=6.0, key=k6), WifiMeasure(lat=6.001, lon=6.0, key=k6), ] session.add_all(data) session.commit() result = location_update_wifi.delay(min_new=1) self.assertEqual(result.get(), (5, 3)) black = session.query(WifiBlacklist).all() self.assertEqual(set([b.key for b in black]), set([k2, k3, k4, k5])) measures = session.query(WifiMeasure).all() self.assertEqual(len(measures), 14) self.assertEqual(set([m.key for m in measures]), keys) # test duplicate call result = location_update_wifi.delay(min_new=1) self.assertEqual(result.get(), 0) self.check_stats( total=6, timer=[ # We made duplicate calls ('task.data.location_update_wifi', 2), # One of those would've scheduled a remove_wifi task ('task.data.remove_wifi', 1) ], gauge=[ ('task.data.location_update_wifi.new_measures_1_100', 2), ])
def test_blacklist_moving_wifis(self): from ichnaea.tasks import wifi_location_update now = datetime.utcnow() long_ago = now - timedelta(days=40) session = self.db_master_session k1 = "ab1234567890" k2 = "bc1234567890" k3 = "cd1234567890" k4 = "de1234567890" k5 = "ef1234567890" k6 = "fa1234567890" keys = set([k1, k2, k3, k4, k5, k6]) # keys k2, k3 and k4 are expected to be detected as moving data = [ # a wifi with an entry but no prior position Wifi(key=k1, new_measures=3, total_measures=0), WifiMeasure(lat=10010000, lon=10010000, key=k1), WifiMeasure(lat=10020000, lon=10050000, key=k1), WifiMeasure(lat=10030000, lon=10090000, key=k1), # a wifi with a prior known position Wifi(lat=20000000, lon=20000000, key=k2, new_measures=2, total_measures=1), WifiMeasure(lat=20100000, lon=20000000, key=k2), WifiMeasure(lat=20700000, lon=20000000, key=k2), # a wifi with a very different prior position Wifi(lat=10000000, lon=10000000, key=k3, new_measures=2, total_measures=1), WifiMeasure(lat=30000000, lon=30000000, key=k3), WifiMeasure(lat=-30000000, lon=30000000, key=k3), # another wifi with a prior known position (and negative lat) Wifi(lat=-40000000, lon=40000000, key=k4, new_measures=2, total_measures=1), WifiMeasure(lat=-41000000, lon=40000000, key=k4), WifiMeasure(lat=-41600000, lon=40000000, key=k4), # an already blacklisted wifi WifiBlacklist(key=k5), WifiMeasure(lat=50000000, lon=50000000, key=k5), WifiMeasure(lat=51000000, lon=50000000, key=k5), # a wifi with an old different record we ignore, position # estimate has been updated since Wifi(lat=60000000, lon=60000000, key=k6, new_measures=2, total_measures=1), WifiMeasure(lat=69000000, lon=69000000, key=k6, created=long_ago), WifiMeasure(lat=60000000, lon=60000000, key=k6), WifiMeasure(lat=60010000, lon=60000000, key=k6), ] session.add_all(data) session.commit() result = wifi_location_update.delay(min_new=1) self.assertEqual(result.get(), (5, 3)) black = session.query(WifiBlacklist).all() self.assertEqual(set([b.key for b in black]), set([k2, k3, k4, k5])) measures = session.query(WifiMeasure).all() self.assertEqual(len(measures), 14) self.assertEqual(set([m.key for m in measures]), keys) # test duplicate call result = wifi_location_update.delay(min_new=1) self.assertEqual(result.get(), 0) self.check_expected_heka_messages( total=4, timer=[ # We made duplicate calls ('task.wifi_location_update', 2), # One of those would've scheduled a remove_wifi task ('task.remove_wifi', 1) ])