def search_cell_lac(session, data): radio = RADIO_TYPE.get(data['radio'], -1) lacs = [] for cell in data['cell']: cell = normalized_cell_dict(cell, default_radio=radio) if not cell: continue cell['cid'] = CELLID_LAC key = to_cellkey(cell) query = session.query(Cell.lat, Cell.lon, Cell.range).filter( *join_cellkey(Cell, key)).filter( Cell.lat.isnot(None)).filter( Cell.lon.isnot(None) ) result = query.first() if result is not None: lacs.append(Network(key, *result)) if not lacs: return # take the smallest LAC of any the user is inside lac = sorted(lacs, key=operator.attrgetter('range'))[0] return { 'lat': quantize(lac.lat), 'lon': quantize(lac.lon), 'accuracy': max(LAC_MIN_ACCURACY, lac.range), }
def make_cell_import_dict(row): def val(key, default): if key in row and row[key] != '' and row[key] is not None: return row[key] else: return default d = dict() d['created'] = datetime.fromtimestamp( int(val('created', 0))).replace(tzinfo=UTC) d['modified'] = datetime.fromtimestamp( int(val('updated', 0))).replace(tzinfo=UTC) d['lat'] = float(val('lat', -255)) d['lon'] = float(val('lon', -255)) d['radio'] = RADIO_TYPE.get(row['radio'].lower(), -1) for k in ['mcc', 'mnc', 'lac', 'cid', 'psc']: d[k] = int(val(k, -1)) d['range'] = int(float(val('range', 0))) d['total_measures'] = int(val('samples', -1)) d['changeable'] = bool(val('changeable', True)) return normalized_cell_dict(d)
def search_cell(session, data): radio = RADIO_TYPE.get(data['radio'], -1) cells = [] for cell in data['cell']: cell = normalized_cell_dict(cell, default_radio=radio) if not cell: continue key = to_cellkey(cell) query = session.query(Cell.lat, Cell.lon, Cell.range).filter( *join_cellkey(Cell, key)).filter( Cell.lat.isnot(None)).filter( Cell.lon.isnot(None) ) result = query.first() if result is not None: cells.append(Network(key, *result)) if not cells: return length = len(cells) avg_lat = sum([c.lat for c in cells]) / length avg_lon = sum([c.lon for c in cells]) / length return { 'lat': quantize(avg_lat), 'lon': quantize(avg_lon), 'accuracy': estimate_accuracy(avg_lat, avg_lon, cells, CELL_MIN_ACCURACY), }
def search_all_sources(session, api_name, data, client_addr=None, geoip_db=None): """ Common code-path for all lookup APIs, using WiFi, cell, cell-lac and GeoIP data sources. :param session: A database session for queries. :param api_name: A string to use in metrics (for example "geolocate"). :param data: A dict conforming to the search API. :param client_addr: The IP address the request came from. :param geoip_db: The geoip database. """ stats_client = get_stats_client() heka_client = get_heka_client() result = None result_metric = None validated = { 'wifi': [], 'cell': [], 'cell_lac': set(), 'cell_network': [], 'cell_lac_network': [], } # Pass-through wifi data validated['wifi'] = data.get('wifi', []) # Pre-process cell data radio = RADIO_TYPE.get(data.get('radio', ''), -1) for cell in data.get('cell', ()): cell = normalized_cell_dict(cell, default_radio=radio) if cell: cell_key = to_cellkey(cell) validated['cell'].append(cell_key) validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC)) # Merge all possible cell and lac keys into one list all_cell_keys = [] all_cell_keys.extend(validated['cell']) for key in validated['cell_lac']: all_cell_keys.append(key) # Do a single query for all cells and lacs at the same time try: all_networks = query_cell_networks(session, all_cell_keys) except Exception: heka_client.raven(RAVEN_ERROR) all_networks = [] for network in all_networks: if network.key == CELLID_LAC: validated['cell_lac_network'].append(network) else: validated['cell_network'].append(network) # Always do a GeoIP lookup because we at _least_ want to use the # country estimate to filter out bogus requests. We may also use # the full GeoIP City-level estimate as well, if all else fails. (geoip_res, countries) = geoip_and_best_guess_country_codes( validated['cell'], api_name, client_addr, geoip_db) # First we attempt a "zoom-in" from cell-lac, to cell # to wifi, tightening our estimate each step only so # long as it doesn't contradict the existing best-estimate # nor the possible countries of origin. for (data_field, object_field, metric_name, search_fn) in [ ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac), ('cell', 'cell_network', 'cell', search_cell), ('wifi', 'wifi', 'wifi', search_wifi)]: if validated[data_field]: r = None try: r = search_fn(session, validated[object_field]) except Exception: heka_client.raven(RAVEN_ERROR) stats_client.incr('%s.%s_error' % (api_name, metric_name)) if r is None: stats_client.incr('%s.no_%s_found' % (api_name, metric_name)) else: lat = float(r['lat']) lon = float(r['lon']) stats_client.incr('%s.%s_found' % (api_name, metric_name)) # Skip any hit that matches none of the possible countries. country_match = False for country in countries: if location_is_in_country(lat, lon, country, 1): country_match = True break if countries and not country_match: stats_client.incr('%s.anomaly.%s_country_mismatch' % (api_name, metric_name)) # Otherwise at least accept the first result we get. elif result is None: result = r result_metric = metric_name # Or any result that appears to be an improvement over the # existing best guess. elif (distance(float(result['lat']), float(result['lon']), lat, lon) * 1000 <= result['accuracy']): result = r result_metric = metric_name else: stats_client.incr('%s.anomaly.%s_%s_mismatch' % (api_name, metric_name, result_metric)) # Fall back to GeoIP if nothing has worked yet. We do not # include this in the "zoom-in" loop because GeoIP is # frequently _wrong_ at the city level; we only want to # accept that estimate if we got nothing better from cell # or wifi. if not result and geoip_res: result = geoip_res result_metric = 'geoip' if not result: stats_client.incr('%s.miss' % api_name) return None rounded_result = { 'lat': round(result['lat'], DEGREE_DECIMAL_PLACES), 'lon': round(result['lon'], DEGREE_DECIMAL_PLACES), 'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES), } stats_client.incr('%s.%s_hit' % (api_name, result_metric)) stats_client.timing('%s.accuracy.%s' % (api_name, result_metric), rounded_result['accuracy']) return rounded_result