Пример #1
0
def process_mapstat(measures, session, userid=None):
    tiles = defaultdict(int)
    # aggregate to 100x100m tiles
    for measure in measures:
        tiles[(measure.lat / 10000, measure.lon / 10000)] += 1
    lats = set([k[0] for k in tiles.keys()])
    lons = set([k[1] for k in tiles.keys()])
    result = session.query(MapStat).filter(
        MapStat.key == MAPSTAT_TYPE['location']).filter(
        MapStat.lat.in_(lats)).filter(
        MapStat.lon.in_(lons)).all()
    prior = {}
    for r in result:
        prior[(r.lat, r.lon)] = r
    tile_count = 0
    for (lat, lon), value in tiles.items():
        old = prior.get((lat, lon), None)
        if old:
            old.value = MapStat.value + value
        else:
            tile_count += 1
            stmt = MapStat.__table__.insert(
                on_duplicate='value = value + %s' % int(value)).values(
                lat=lat, lon=lon, key=MAPSTAT_TYPE['location'], value=value)
            session.execute(stmt)
    if userid is not None and tile_count > 0:
        process_score(userid, tile_count, session, key='new_location')
Пример #2
0
def process_cell_measure(session, measure_data, entries, userid=None):
    cell_count = defaultdict(int)
    cell_measures = []
    created = decode_datetime(measure_data.get('created', ''))

    # process entries
    for entry in entries:
        cell_measure = create_cell_measure(measure_data, entry)
        # use more specific cell type or
        # fall back to less precise measure
        if entry.get('radio'):
            cell_measure.radio = RADIO_TYPE.get(entry['radio'], -1)
        else:
            cell_measure.radio = measure_data['radio']
        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[CellKey(cell_measure.radio, cell_measure.mcc,
                           cell_measure.mnc, cell_measure.lac,
                           cell_measure.cid)] += 1

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, created, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    session.add_all(cell_measures)
    return cell_measures
Пример #3
0
def process_mapstat(measures, session, userid=None):
    tiles = defaultdict(int)
    # aggregate to 100x100m tiles
    for measure in measures:
        tiles[(measure.lat / 10000, measure.lon / 10000)] += 1
    # TODO: on duplicate key update
    lats = set([k[0] for k in tiles.keys()])
    lons = set([k[1] for k in tiles.keys()])
    result = session.query(MapStat).filter(
        MapStat.key == MAPSTAT_TYPE['location']).filter(
        MapStat.lat.in_(lats)).filter(
        MapStat.lon.in_(lons)).all()
    prior = {}
    for r in result:
        prior[(r.lat, r.lon)] = r
    tile_count = 0
    for (lat, lon), value in tiles.items():
        stat = MapStat(lat=lat, lon=lon, value=value)
        old = prior.get((lat, lon), None)
        if old:
            if old.value < 2:
                # give points for the first two tile hits
                tile_count += 1
            old.value = MapStat.value + value
        else:
            tile_count += 1
            session.add(stat)
    if userid is not None and tile_count > 0:
        process_score(userid, tile_count, session, key='new_location')
Пример #4
0
def update_cell_measure_count(measure, session, userid=None):
    if (measure.radio == -1 or measure.lac == 0 or measure.cid == 0):
        # only update data for complete records
        return

    # do we already know about these cells?
    query = session.query(Cell).filter(
        Cell.radio == measure.radio).filter(
        Cell.mcc == measure.mcc).filter(
        Cell.mnc == measure.mnc).filter(
        Cell.lac == measure.lac).filter(
        Cell.cid == measure.cid
    )
    cell = query.first()
    new_cell = 0
    if cell is None:
        new_cell += 1

    stmt = Cell.__table__.insert(
        on_duplicate='new_measures = new_measures + 1, '
                     'total_measures = total_measures + 1').values(
        created=measure.created, radio=measure.radio,
        mcc=measure.mcc, mnc=measure.mnc, lac=measure.lac, cid=measure.cid,
        new_measures=1, total_measures=1)
    session.execute(stmt)

    if userid is not None and new_cell > 0:
        # update user score
        process_score(userid, new_cell, session, key='new_cell')
Пример #5
0
def submit_post(request):
    session = request.db_master_session
    session_objects = []

    nickname = request.headers.get('X-Nickname', '')
    userid, nickname = process_user(nickname, session)

    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(60)

    points = 0
    measures = []
    for item in request.validated['items']:
        item = process_time(item, utcnow, utcmin)
        measure = process_measure(item, utcnow, session, userid=userid)
        measures.append(measure)
        points += 1

    if userid is not None:
        process_score(userid, points, session)
    if measures:
        process_mapstat(measures, session, userid=userid)

    session.add_all(session_objects)
    session.commit()
    return HTTPNoContent()
Пример #6
0
def update_wifi_measure_count(wifi_key, wifis, session, userid=None):
    # side-effect, modifies wifis
    new_wifi = 0
    if wifi_key in wifis:
        wifi = wifis[wifi_key]
        if isinstance(wifi.new_measures, (int, long)):
            wifi.new_measures = Wifi.new_measures + 1
        else:
            # already a sql expression
            wifi.new_measures += 1
        if isinstance(wifi.total_measures, (int, long)):
            if wifi.total_measures < 5:
                # count wifis as new until they show up in the search
                new_wifi += 1
            wifi.total_measures = Wifi.total_measures + 1
        else:
            # already a sql expression
            wifi.total_measures += 1
    else:
        wifis[wifi_key] = wifi = Wifi(
            key=wifi_key, new_measures=1, total_measures=1)
        new_wifi += 1
        session.add(wifi)
    if userid is not None and new_wifi > 0:
        # update user score
        process_score(userid, new_wifi, session, key='new_wifi')
Пример #7
0
def process_measures(items, session, userid=None):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(60)

    # get enough auto-increment ids assigned
    measures = []
    for i in range(len(items)):
        measure = Measure()
        measures.append(measure)
        session.add(measure)
    # TODO switch unique measure id to a uuid, so we don't have to do
    # get these from a savepoint here
    session.flush()

    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item = process_time(item, utcnow, utcmin)
        cell, wifi = process_measure(measures[i].id, item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        positions.append({
            'lat': to_precise_int(item['lat']),
            'lon': to_precise_int(item['lon']),
        })

    heka_client = get_heka_client()

    if cell_measures:
        # group by and create task per cell key
        heka_client.incr("items.uploaded.cell_measures",
                         len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cell_key = CellKey(measure['radio'], measure['mcc'],
                               measure['mnc'], measure['lac'],
                               measure['cid'], measure['psc'])
            cells[cell_key].append(measure)

        for values in cells.values():
            insert_cell_measures.delay(values, userid=userid)

    if wifi_measures:
        # group by and create task per wifi key
        heka_client.incr("items.uploaded.wifi_measures",
                         len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        for values in wifis.values():
            insert_wifi_measures.delay(values, userid=userid)

    if userid is not None:
        process_score(userid, len(items), session)
    if positions:
        process_mapstat(positions, session, userid=userid)
Пример #8
0
def process_mapstat(positions, session, userid=None):
    # 10x10 meter tiles
    tile_count = process_mapstat_keyed(
        1000, MAPSTAT_TYPE['location'], positions, session)
    if userid is not None and tile_count > 0:
        process_score(userid, tile_count, session, key='new_location')
    # 100x100 m tiles
    process_mapstat_keyed(
        10000, MAPSTAT_TYPE['location_100m'], positions, session)
Пример #9
0
def process_mapstat(positions, session, userid=None):
    # 10x10 meter tiles
    tile_count = process_mapstat_keyed(
        1000, MAPSTAT_TYPE['location'], positions, session)
    if userid is not None and tile_count > 0:
        process_score(userid, tile_count, session, key='new_location')
    # 100x100 m tiles
    process_mapstat_keyed(
        10000, MAPSTAT_TYPE['location_100m'], positions, session)
Пример #10
0
def process_measures(items, session, userid=None):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(60)

    # get enough auto-increment ids assigned
    measures = []
    for i in range(len(items)):
        measure = Measure()
        measures.append(measure)
        session.add(measure)
    # TODO switch unique measure id to a uuid, so we don't have to do
    # get these from a savepoint here
    session.flush()

    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item = process_time(item, utcnow, utcmin)
        cell, wifi = process_measure(measures[i].id, item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        positions.append({
            'lat': to_precise_int(item['lat']),
            'lon': to_precise_int(item['lon']),
        })

    heka_client = get_heka_client()

    if cell_measures:
        # group by and create task per cell key
        heka_client.incr("items.uploaded.cell_measures",
                         len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cells[to_cellkey_psc(measure)].append(measure)

        for values in cells.values():
            insert_cell_measures.delay(values, userid=userid)

    if wifi_measures:
        # group by and create task per wifi key
        heka_client.incr("items.uploaded.wifi_measures",
                         len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        for values in wifis.values():
            insert_wifi_measures.delay(values, userid=userid)

    if userid is not None:
        process_score(userid, len(items), session)
    if positions:
        process_mapstat(positions, session, userid=userid)
Пример #11
0
def update_wifi_measure_count(wifi_key, wifis, created, session, userid=None):
    new_wifi = 0
    if wifi_key not in wifis:
        new_wifi += 1
        wifis[wifi_key] = True

    stmt = Wifi.__table__.insert(
        on_duplicate='new_measures = new_measures + 1, '
                     'total_measures = total_measures + 1').values(
        key=wifi_key, created=created,
        new_measures=1, total_measures=1)
    session.execute(stmt)

    if userid is not None and new_wifi > 0:
        # update user score
        process_score(userid, new_wifi, session, key='new_wifi')
Пример #12
0
def process_wifi_measure(session, measure_data, entries, userid=None):
    wifi_measures = []
    wifi_count = defaultdict(int)
    wifi_keys = set([e['key'] for e in entries])
    created = decode_datetime(measure_data.get('created', ''))

    # did we get measures for blacklisted wifis?
    blacked = session.query(WifiBlacklist.key).filter(
        WifiBlacklist.key.in_(wifi_keys)).all()
    blacked = set([b[0] for b in blacked])

    # process entries
    for entry in entries:
        wifi_key = entry['key']
        # convert frequency into channel numbers and remove frequency
        convert_frequency(entry)
        wifi_measures.append(create_wifi_measure(measure_data, created, entry))
        if wifi_key not in blacked:
            # skip blacklisted wifi AP's
            wifi_count[wifi_key] += 1

    # update user score
    if userid is not None:
        # do we already know about any wifis?
        white_keys = wifi_keys - blacked
        if white_keys:
            wifis = session.query(Wifi.key).filter(Wifi.key.in_(white_keys))
            wifis = dict([(w[0], True) for w in wifis.all()])
        else:
            wifis = {}
        # subtract known wifis from all unique wifis
        new_wifis = len(wifi_count) - len(wifis)
        if new_wifis > 0:
            process_score(userid, new_wifis, session, key='new_wifi')

    # update new/total measure counts
    for wifi_key, num in wifi_count.items():
        stmt = Wifi.__table__.insert(
            on_duplicate='new_measures = new_measures + %s, '
                         'total_measures = total_measures + %s' % (num, num)
        ).values(
            key=wifi_key, created=created,
            new_measures=num, total_measures=num)
        session.execute(stmt)

    session.add_all(wifi_measures)
    return wifi_measures
Пример #13
0
def update_cell_measure_count(measure, session, userid=None):
    if (measure.radio == -1 or measure.lac == 0 or measure.cid == 0):
        # only update data for complete records
        return

    # do we already know about these cells?
    query = session.query(Cell).filter(
        Cell.radio == measure.radio).filter(
        Cell.mcc == measure.mcc).filter(
        Cell.mnc == measure.mnc).filter(
        Cell.lac == measure.lac).filter(
        Cell.cid == measure.cid
    )
    cell = query.first()
    new_cell = 0
    if cell:
        if isinstance(cell.new_measures, (int, long)):
            cell.new_measures = Cell.new_measures + 1
        else:
            # already a sql expression
            cell.new_measures += 1
        if isinstance(cell.total_measures, (int, long)):
            if cell.total_measures < 5:
                # count cells as new until they show up in the search
                new_cell += 1
            cell.total_measures = Cell.total_measures + 1
        else:
            # already a sql expression
            cell.total_measures += 1
            if cell.total_measures.right.value < 5:
                # count cells as new until they show up in the search
                new_cell += 1
    else:
        cell = Cell(radio=measure.radio, mcc=measure.mcc, mnc=measure.mnc,
                    lac=measure.lac, cid=measure.cid,
                    new_measures=1, total_measures=1)
        new_cell += 1
        session.add(cell)
    if userid is not None and new_cell > 0:
        # update user score
        process_score(userid, new_cell, session, key='new_cell')
Пример #14
0
def process_wifi_measures(session, entries, userid=None,
                          max_measures_per_wifi=11000):
    wifi_measures = []
    wifi_count = defaultdict(int)
    wifi_keys = set([e['key'] for e in entries])

    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    # did we get measures for blacklisted wifis?
    blacked = session.query(WifiBlacklist.key).filter(
        WifiBlacklist.key.in_(wifi_keys)).all()
    blacked = set([b[0] for b in blacked])

    space_available = {}
    dropped_overflow = 0

    # process entries
    for entry in entries:
        wifi_key = entry['key']

        # check if there's space for new measurement within per-AP maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if wifi_key not in space_available:
            query = session.query(Wifi.total_measures).filter(
                Wifi.key == wifi_key)
            curr = query.first()
            if curr is not None:
                space_available[wifi_key] = max_measures_per_wifi - curr[0]
            else:
                space_available[wifi_key] = max_measures_per_wifi

        if space_available[wifi_key] > 0:
            space_available[wifi_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # convert frequency into channel numbers and remove frequency
        convert_frequency(entry)
        wifi_measures.append(create_wifi_measure(utcnow, entry))
        if wifi_key not in blacked:
            # skip blacklisted wifi AP's
            wifi_count[wifi_key] += 1

    heka_client = get_heka_client()

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.wifi_ingress_overflow",
                         count=dropped_overflow)

    # update user score
    if userid is not None:
        # do we already know about any wifis?
        white_keys = wifi_keys - blacked
        if white_keys:
            wifis = session.query(Wifi.key).filter(Wifi.key.in_(white_keys))
            wifis = dict([(w[0], True) for w in wifis.all()])
        else:
            wifis = {}
        # subtract known wifis from all unique wifis
        new_wifis = len(wifi_count) - len(wifis)
        if new_wifis > 0:
            process_score(userid, new_wifis, session, key='new_wifi')

    # update new/total measure counts
    for wifi_key, num in wifi_count.items():
        stmt = Wifi.__table__.insert(
            on_duplicate='new_measures = new_measures + %s, '
                         'total_measures = total_measures + %s' % (num, num)
        ).values(
            key=wifi_key, created=utcnow,
            new_measures=num, total_measures=num)
        session.execute(stmt)

    heka_client.incr("items.inserted.wifi_measures",
                     count=len(wifi_measures))
    session.add_all(wifi_measures)
    return wifi_measures
Пример #15
0
def process_cell_measures(session, entries, userid=None,
                          max_measures_per_cell=11000):
    cell_count = defaultdict(int)
    cell_measures = []
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    dropped_malformed = 0
    dropped_overflow = 0
    space_available = {}

    # process entries
    for entry in entries:

        cell_measure = create_cell_measure(utcnow, entry)
        if not cell_measure:
            dropped_malformed += 1
            continue

        cell_key = to_cellkey_psc(cell_measure)

        # check if there's space for new measurement within per-cell maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if cell_key not in space_available:
            query = session.query(Cell.total_measures).filter(
                Cell.radio == cell_key.radio,
                Cell.mcc == cell_key.mcc,
                Cell.mnc == cell_key.mnc,
                Cell.lac == cell_key.lac,
                Cell.cid == cell_key.cid,
                Cell.psc == cell_key.psc)
            curr = query.first()
            if curr is not None:
                space_available[cell_key] = max_measures_per_cell - curr[0]
            else:
                space_available[cell_key] = max_measures_per_cell

        if space_available[cell_key] > 0:
            space_available[cell_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # Possibly drop measure if we're receiving them too
        # quickly for this cell.
        query = session.query(Cell.total_measures).filter(
            Cell.radio == cell_measure.radio,
            Cell.mcc == cell_measure.mcc,
            Cell.mnc == cell_measure.mnc,
            Cell.lac == cell_measure.lac,
            Cell.cid == cell_measure.cid,
            Cell.psc == cell_measure.psc)
        total_measures = query.first()
        if total_measures is not None:
            if total_measures[0] > max_measures_per_cell:
                dropped_overflow += 1
                continue

        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[cell_key] += 1

    heka_client = get_heka_client()

    if dropped_malformed != 0:
        heka_client.incr("items.dropped.cell_ingress_malformed",
                         count=dropped_malformed)

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.cell_ingress_overflow",
                         count=dropped_overflow)

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, utcnow, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    heka_client.incr("items.inserted.cell_measures",
                     count=len(cell_measures))
    session.add_all(cell_measures)
    return cell_measures
Пример #16
0
def process_wifi_measures(session, entries, userid=None,
                          max_measures_per_wifi=11000):
    wifi_measures = []
    wifi_count = defaultdict(int)
    wifi_keys = set([e['key'] for e in entries])

    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    # did we get measures for blacklisted wifis?
    blacked = session.query(WifiBlacklist.key).filter(
        WifiBlacklist.key.in_(wifi_keys)).all()
    blacked = set([b[0] for b in blacked])

    space_available = {}
    dropped_overflow = 0

    # process entries
    for entry in entries:
        wifi_key = entry['key']

        # check if there's space for new measurement within per-AP maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if wifi_key not in space_available:
            query = session.query(Wifi.total_measures).filter(
                Wifi.key == wifi_key)
            curr = query.first()
            if curr is not None:
                space_available[wifi_key] = max_measures_per_wifi - curr[0]
            else:
                space_available[wifi_key] = max_measures_per_wifi

        if space_available[wifi_key] > 0:
            space_available[wifi_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # convert frequency into channel numbers and remove frequency
        convert_frequency(entry)
        wifi_measures.append(create_wifi_measure(utcnow, entry))
        if wifi_key not in blacked:
            # skip blacklisted wifi AP's
            wifi_count[wifi_key] += 1

    heka_client = get_heka_client()

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.wifi_ingress_overflow",
                         count=dropped_overflow)

    # update user score
    if userid is not None:
        # do we already know about any wifis?
        white_keys = wifi_keys - blacked
        if white_keys:
            wifis = session.query(Wifi.key).filter(Wifi.key.in_(white_keys))
            wifis = dict([(w[0], True) for w in wifis.all()])
        else:
            wifis = {}
        # subtract known wifis from all unique wifis
        new_wifis = len(wifi_count) - len(wifis)
        if new_wifis > 0:
            process_score(userid, new_wifis, session, key='new_wifi')

    # update new/total measure counts
    for wifi_key, num in wifi_count.items():
        stmt = Wifi.__table__.insert(
            on_duplicate='new_measures = new_measures + %s, '
                         'total_measures = total_measures + %s' % (num, num)
        ).values(
            key=wifi_key, created=utcnow,
            new_measures=num, total_measures=num)
        session.execute(stmt)

    heka_client.incr("items.inserted.wifi_measures",
                     count=len(wifi_measures))
    session.add_all(wifi_measures)
    return wifi_measures
Пример #17
0
def process_station_measures(session, entries, station_type,
                             station_model, measure_model, blacklist_model,
                             create_measure, create_key, join_key,
                             userid=None, max_measures_per_station=11000,
                             utcnow=None):

    all_measures = []
    dropped_blacklisted = 0
    dropped_malformed = 0
    dropped_overflow = 0
    stats_client = get_stats_client()
    new_stations = 0
    if utcnow is None:
        utcnow = util.utcnow()
    elif isinstance(utcnow, basestring):
        utcnow = decode_datetime(utcnow)

    # Process entries and group by validated station key
    station_measures = defaultdict(list)
    for entry in entries:
        measure = create_measure(utcnow, entry)

        if not measure:
            dropped_malformed += 1
            continue

        station_measures[create_key(measure)].append(measure)

    # Process measures one station at a time
    for key, measures in station_measures.items():

        incomplete = False
        is_new_station = False

        # Figure out how much space is left for this station.
        free = available_station_space(session, key, station_model,
                                       join_key, max_measures_per_station)
        if free is None:
            is_new_station = True
            free = max_measures_per_station

        if is_new_station:
            # Drop measures for blacklisted stations.
            if blacklisted_station(session, key, blacklist_model,
                                   join_key, utcnow):
                dropped_blacklisted += len(measures)
                continue

            incomplete = incomplete_measure(key)
            if not incomplete:
                # We discovered an actual new complete station.
                new_stations += 1

        # Accept measures up to input-throttling limit, then drop.
        num = 0
        for measure in measures:
            if free <= 0:
                dropped_overflow += 1
                continue
            all_measures.append(measure)
            free -= 1
            num += 1

        # Accept incomplete measures, just don't make stations for them.
        # (station creation is a side effect of count-updating)
        if not incomplete and num > 0:
            create_or_update_station(session, key, station_model,
                                     join_key, utcnow, num)

    # Credit the user with discovering any new stations.
    if userid is not None and new_stations > 0:
        process_score(userid, new_stations, session,
                      key='new_' + station_type)

    if dropped_blacklisted != 0:
        stats_client.incr(
            "items.dropped.%s_ingress_blacklisted" % station_type,
            count=dropped_blacklisted)

    if dropped_malformed != 0:
        stats_client.incr(
            "items.dropped.%s_ingress_malformed" % station_type,
            count=dropped_malformed)

    if dropped_overflow != 0:
        stats_client.incr(
            "items.dropped.%s_ingress_overflow" % station_type,
            count=dropped_overflow)

    stats_client.incr(
        "items.inserted.%s_measures" % station_type,
        count=len(all_measures))

    session.add_all(all_measures)
    return all_measures
Пример #18
0
def process_mapstat(measures, session, userid=None):
    # 10x10 meter tiles
    tile_count = process_mapstat_keyed(1000, MAPSTAT_TYPE["location"], measures, session)
    if userid is not None and tile_count > 0:
        process_score(userid, tile_count, session, key="new_location")
Пример #19
0
def process_cell_measures(session, entries, userid=None,
                          max_measures_per_cell=11000):
    cell_count = defaultdict(int)
    cell_measures = []
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    dropped_malformed = 0
    dropped_overflow = 0
    space_available = {}

    # process entries
    for entry in entries:

        cell_measure = create_cell_measure(utcnow, entry)
        if not cell_measure:
            dropped_malformed += 1
            continue

        cell_key = to_cellkey_psc(cell_measure)

        # check if there's space for new measurement within per-cell maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if cell_key not in space_available:
            query = session.query(Cell.total_measures).filter(
                *join_cellkey(Cell, cell_key))
            curr = query.first()
            if curr is not None:
                space_available[cell_key] = max_measures_per_cell - curr[0]
            else:
                space_available[cell_key] = max_measures_per_cell

        if space_available[cell_key] > 0:
            space_available[cell_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # Possibly drop measure if we're receiving them too
        # quickly for this cell.
        query = session.query(Cell.total_measures).filter(
            *join_cellkey(Cell, cell_key))
        total_measures = query.first()
        if total_measures is not None:
            if total_measures[0] > max_measures_per_cell:
                dropped_overflow += 1
                continue

        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[cell_key] += 1

    heka_client = get_heka_client()

    if dropped_malformed != 0:
        heka_client.incr("items.dropped.cell_ingress_malformed",
                         count=dropped_malformed)

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.cell_ingress_overflow",
                         count=dropped_overflow)

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, utcnow, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    heka_client.incr("items.inserted.cell_measures",
                     count=len(cell_measures))
    session.add_all(cell_measures)
    return cell_measures
Пример #20
0
def process_measures(items, session, userid=None):
    stats_client = get_stats_client()
    utcnow = util.utcnow()
    utcmin = utcnow - datetime.timedelta(60)

    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item = process_time(item, utcnow, utcmin)
        report_id = uuid.uuid1().hex
        cell, wifi = process_measure(report_id, item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        if cell or wifi:
            positions.append({
                'lat': item['lat'],
                'lon': item['lon'],
            })

    if cell_measures:
        # group by and create task per cell key
        stats_client.incr("items.uploaded.cell_measures",
                          len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cells[to_cellkey_psc(measure)].append(measure)

        for values in cells.values():
            # insert measures, expire the task if it wasn't processed
            # after two hours to avoid queue overload
            insert_cell_measures.apply_async(
                args=[values],
                kwargs={'userid': userid},
                expires=7200)

    if wifi_measures:
        # group by WiFi key
        stats_client.incr("items.uploaded.wifi_measures",
                          len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        # Create a task per group of 5 WiFi keys at a time.
        # We tend to get a huge number of unique WiFi networks per
        # batch upload, with one to very few measures per WiFi.
        # Grouping them helps in avoiding per-task overhead.
        wifis = list(wifis.values())
        batch_size = 5
        for i in range(0, len(wifis), batch_size):
            values = []
            for measures in wifis[i:i + batch_size]:
                values.extend(measures)
            # insert measures, expire the task if it wasn't processed
            # after two hours to avoid queue overload
            insert_wifi_measures.apply_async(
                args=[values],
                kwargs={'userid': userid},
                expires=7200)

    if userid is not None:
        process_score(userid, len(positions), session)
    if positions:
        process_mapstat(session, utcnow, positions)