def process_measure(data, utcnow, session): session_objects = [] measure = Measure() measure.created = utcnow measure.time = data['time'] measure.lat = to_precise_int(data['lat']) measure.lon = to_precise_int(data['lon']) measure.accuracy = data['accuracy'] measure.altitude = data['altitude'] measure.altitude_accuracy = data['altitude_accuracy'] measure.radio = RADIO_TYPE.get(data['radio'], -1) # get measure.id set session.add(measure) session.flush() if data.get('cell'): cells, cell_data = process_cell(data['cell'], measure) measure.cell = dumps(cell_data) session_objects.extend(cells) if data.get('wifi'): # filter out old-style sha1 hashes too_long_keys = False for w in data['wifi']: w['key'] = key = normalize_wifi_key(w['key']) if len(key) > 12: too_long_keys = True break if not too_long_keys: process_wifi(data['wifi'], measure) measure.wifi = dumps(data['wifi']) return (measure, session_objects)
def process_measures(items, session, userid=None): utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC) utcmin = utcnow - datetime.timedelta(60) # get enough auto-increment ids assigned measures = [] for i in range(len(items)): measure = Measure() measures.append(measure) session.add(measure) # TODO switch unique measure id to a uuid, so we don't have to do # get these from a savepoint here session.flush() positions = [] cell_measures = [] wifi_measures = [] for i, item in enumerate(items): item = process_time(item, utcnow, utcmin) cell, wifi = process_measure(measures[i].id, item, session) cell_measures.extend(cell) wifi_measures.extend(wifi) positions.append({ 'lat': to_precise_int(item['lat']), 'lon': to_precise_int(item['lon']), }) heka_client = get_heka_client() if cell_measures: # group by and create task per cell key heka_client.incr("items.uploaded.cell_measures", len(cell_measures)) cells = defaultdict(list) for measure in cell_measures: cell_key = CellKey(measure['radio'], measure['mcc'], measure['mnc'], measure['lac'], measure['cid'], measure['psc']) cells[cell_key].append(measure) for values in cells.values(): insert_cell_measures.delay(values, userid=userid) if wifi_measures: # group by and create task per wifi key heka_client.incr("items.uploaded.wifi_measures", len(wifi_measures)) wifis = defaultdict(list) for measure in wifi_measures: wifis[measure['key']].append(measure) for values in wifis.values(): insert_wifi_measures.delay(values, userid=userid) if userid is not None: process_score(userid, len(items), session) if positions: process_mapstat(positions, session, userid=userid)
def process_measure(measure_id, data, session): def add_missing_dict_entries(dst, src): # x.update(y) overwrites entries in x with those in y; # we want to only add those not already present for (k, v) in src.items(): if k not in dst: dst[k] = v cell_measures = {} wifi_measures = {} measure_data = dict( measure_id=measure_id, lat=to_precise_int(data['lat']), lon=to_precise_int(data['lon']), time=encode_datetime(data['time']), accuracy=data['accuracy'], altitude=data['altitude'], altitude_accuracy=data['altitude_accuracy'], ) measure_radio = RADIO_TYPE.get(data['radio'], -1) if data.get('cell'): # flatten measure / cell data into a single dict for c in data['cell']: add_missing_dict_entries(c, measure_data) c = normalized_cell_measure_dict(c, measure_radio) if c is None: continue key = to_cellkey_psc(c) if key in cell_measures: existing = cell_measures[key] if existing['ta'] > c['ta'] or \ (existing['signal'] != 0 and existing['signal'] < c['signal']) or \ existing['asu'] < c['asu']: cell_measures[key] = c else: cell_measures[key] = c cell_measures = cell_measures.values() # flatten measure / wifi data into a single dict if data.get('wifi'): for w in data['wifi']: add_missing_dict_entries(w, measure_data) w = normalized_wifi_measure_dict(w) if w is None: continue key = w['key'] if key in wifi_measures: existing = wifi_measures[key] if existing['signal'] != 0 and \ existing['signal'] < w['signal']: wifi_measures[key] = w else: wifi_measures[key] = w wifi_measures = wifi_measures.values() return (cell_measures, wifi_measures)
def process_measures(items, session, userid=None): utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC) utcmin = utcnow - datetime.timedelta(60) # get enough auto-increment ids assigned measures = [] for i in range(len(items)): measure = Measure() measures.append(measure) session.add(measure) # TODO switch unique measure id to a uuid, so we don't have to do # get these from a savepoint here session.flush() positions = [] cell_measures = [] wifi_measures = [] for i, item in enumerate(items): item = process_time(item, utcnow, utcmin) cell, wifi = process_measure(measures[i].id, item, session) cell_measures.extend(cell) wifi_measures.extend(wifi) positions.append({ 'lat': to_precise_int(item['lat']), 'lon': to_precise_int(item['lon']), }) heka_client = get_heka_client() if cell_measures: # group by and create task per cell key heka_client.incr("items.uploaded.cell_measures", len(cell_measures)) cells = defaultdict(list) for measure in cell_measures: cells[to_cellkey_psc(measure)].append(measure) for values in cells.values(): insert_cell_measures.delay(values, userid=userid) if wifi_measures: # group by and create task per wifi key heka_client.incr("items.uploaded.wifi_measures", len(wifi_measures)) wifis = defaultdict(list) for measure in wifi_measures: wifis[measure['key']].append(measure) for values in wifis.values(): insert_wifi_measures.delay(values, userid=userid) if userid is not None: process_score(userid, len(items), session) if positions: process_mapstat(positions, session, userid=userid)
def process_measure(measure_id, data, session): cell_measures = [] wifi_measures = [] measure_data = dict( measure_id=measure_id, lat=to_precise_int(data['lat']), lon=to_precise_int(data['lon']), time=encode_datetime(data['time']), accuracy=data['accuracy'], altitude=data['altitude'], altitude_accuracy=data['altitude_accuracy'], ) measure_radio = RADIO_TYPE.get(data['radio'], -1) if data.get('cell'): # flatten measure / cell data into a single dict for c in data['cell']: c.update(measure_data) # use more specific cell type or # fall back to less precise measure if c['radio'] != '': c['radio'] = RADIO_TYPE.get(c['radio'], -1) else: c['radio'] = measure_radio cell_measures = data['cell'] if data.get('wifi'): # filter out old-style sha1 hashes invalid_wifi_key = False for w in data['wifi']: w['key'] = key = normalize_wifi_key(w['key']) if not valid_wifi_pattern(key): invalid_wifi_key = True break if not invalid_wifi_key: # flatten measure / wifi data into a single dict for w in data['wifi']: w.update(measure_data) wifi_measures = data['wifi'] return (cell_measures, wifi_measures)
def process_measure(data, utcnow, session, userid=None): measure = Measure() measure.created = utcnow measure.time = data["time"] measure.lat = to_precise_int(data["lat"]) measure.lon = to_precise_int(data["lon"]) measure.accuracy = data["accuracy"] measure.altitude = data["altitude"] measure.altitude_accuracy = data["altitude_accuracy"] measure.radio = RADIO_TYPE.get(data["radio"], -1) # get measure.id set session.add(measure) session.flush() measure_data = dict( id=measure.id, created=encode_datetime(measure.created), lat=measure.lat, lon=measure.lon, time=encode_datetime(measure.time), accuracy=measure.accuracy, altitude=measure.altitude, altitude_accuracy=measure.altitude_accuracy, radio=measure.radio, ) if data.get("cell"): insert_cell_measure.delay(measure_data, data["cell"], userid=userid) measure.cell = dumps(data["cell"]) if data.get("wifi"): # filter out old-style sha1 hashes too_long_keys = False for w in data["wifi"]: w["key"] = key = normalize_wifi_key(w["key"]) if len(key) > 12: too_long_keys = True break if not too_long_keys: insert_wifi_measure.delay(measure_data, data["wifi"], userid=userid) measure.wifi = dumps(data["wifi"]) return measure