Пример #1
0
def main():
    counter = 0
    timer = statsd.timer('lisa.process_ip', rate=0.01)  # 1% sample rate

    while True:
        if KILLED:
            log.info('Shutdown successful')
            return 0

        try:
            ip_info = redis.brpop(rkeys.IPLOGS)
        except RedisError as e:
            log.error('Error with Redis: {}'.format(e))
            return 1

        # don't start above redis call as it will block to wait
        timer.start()

        log.debug('Got log data: ' + ip_info[1])
        try:
            rtype, ip = ip_info[1].split(',')
        except ValueError:
            continue

        timestamp = get_epoch_minute()

        if rate_limit_ip(ip, timestamp):
            continue

        record = geo.get(ip)
        if record:
            # everything goes for total count and map
            process_map(record, timestamp)
            # only shares get more processing
            if rtype != data_types.DOWNLOAD:
                process_share(record, rtype)

        timer.stop()
        statsd.incr('lisa.process_ip', rate=0.01)  # 1% sample rate

        if args.verbose:
            sys.stdout.write('.')
            sys.stdout.flush()

        # using a counter and if statement here instead of the
        # `rate` param on the gauge to avoid getting the length
        # of the Redis list every time.
        counter += 1
        if counter >= 1000:
            counter = 0
            statsd.gauge('queue.geoip', redis.llen(rkeys.IPLOGS))
Пример #2
0
def get_data_for_timestamp(timestamp):
    """
    Return aggregate map and share data dict for a timestamp.
    """
    issue_continents = get_issue_dict()
    issue_countries = get_issue_dict()
    data = {
        'map_total': int(redis.get(rkeys.MAP_TOTAL) or 0),
        'map_previous_total': int(redis.get(rkeys.MAP_TOTAL_SNAPSHOT) or 0),
        'map_geo': [],
        'share_total': int(redis.get(rkeys.SHARE_TOTAL) or 0),
        'continent_issues': {},
        'issue_continents': issue_continents,
        'country_issues': {},
        'issue_countries': issue_countries,
    }
    statsd.gauge('milhouse.map_total', data['map_total'])
    redis.set(rkeys.MAP_TOTAL_SNAPSHOT, data['map_total'])
    map_geo_key = rkeys.MAP_GEO.format(timestamp)
    geo_data = redis.hgetall(map_geo_key)
    for latlon, count in geo_data.iteritems():
        lat, lon = latlon.split(':')
        data['map_geo'].append({
            'lat': float(lat),
            'lon': float(lon),
            'count': int(count),
        })

    # CONTINENTS #
    continent_totals = redis.hgetall(rkeys.SHARE_CONTINENTS)
    continent_issues = data['continent_issues']
    for continent, count in continent_totals.iteritems():
        count = int(count)
        issues = redis.hgetall(rkeys.SHARE_CONTINENT_ISSUES.format(continent))
        continent_issues[continent] = {}
        for issue, issue_count in issues.iteritems():
            issue_count = int(issue_count)
            issue = data_types.types_map[issue]
            percent = get_percent(issue_count, count)
            continent_issues[continent][issue] = percent
            issue_continents[issue].append({
                'continent': continent,
                'count': percent,
            })

    # COUNTRIES #
    country_totals = redis.hgetall(rkeys.SHARE_COUNTRIES)
    country_issues = data['country_issues']
    for country, count in country_totals.iteritems():
        count = int(count)
        if count < conf.COUNTRY_MIN_SHARE:
            continue
        issues = redis.hgetall(rkeys.SHARE_COUNTRY_ISSUES.format(country))
        country_issues[country] = {}
        for issue, issue_count in issues.iteritems():
            issue_count = int(issue_count)
            issue = data_types.types_map[issue]
            percent = get_percent(issue_count, count)
            country_issues[country][issue] = percent
            issue_countries[issue].append({
                'country': country,
                'count': percent,
            })

    # GLOBAL #
    share_issues = redis.hgetall(rkeys.SHARE_ISSUES)
    share_total = data['share_total']
    global_issues = country_issues['GLOBAL'] = {}
    for issue, count in share_issues.iteritems():
        count = int(count)
        issue = data_types.types_map[issue]
        global_issues[issue] = get_percent(count, share_total)

    return data
Пример #3
0
def main():
    global counter
    timer = statsd.timer('lisa.process_ip', rate=0.01)  # 1% sample rate
    pipe = redis.pipeline()

    while True:
        if KILLED:
            pipe.execute()
            log.info('Shutdown successful')
            return 0

        try:
            if args.benchmark:
                ip_info = redis.rpop(rkeys.IPLOGS)
            else:
                ip_info = redis.brpop(rkeys.IPLOGS)[1]
        except RedisError as e:
            log.error('Error with Redis: {}'.format(e))
            pipe.execute()
            return 1

        if ip_info is None:
            # benchmark run is over
            pipe.execute()
            return 0

        # don't start above redis call as it will block to wait
        timer.start()

        log.debug('Got log data: ' + ip_info)
        try:
            rtype, ip = ip_info.split(',')
        except ValueError:
            continue

        timestamp = get_epoch_minute()

        if rate_limit_ip(ip):
            continue

        record = geo.get(ip)
        if record:
            # everything goes for total count and map
            process_map(record, timestamp, pipe)
            # only shares get more processing
            if rtype != data_types.DOWNLOAD:
                process_share(record, rtype, pipe)

        timer.stop()
        statsd.incr('lisa.process_ip', rate=0.01)  # 1% sample rate

        if args.verbose:
            sys.stdout.write('.')
            sys.stdout.flush()

        # using a counter and if statement here instead of the
        # `rate` param on the gauge to avoid getting the length
        # of the Redis list every time.
        counter += 1
        if args.benchmark:
            if not counter % 1000:
                pipe.execute()
        else:
            if counter >= 1000:
                pipe.execute()
                counter = 0
                statsd.gauge('queue.geoip', redis.llen(rkeys.IPLOGS))