Пример #1
0
def task_expire_stale_prefs():
    """
    Every day, clear out preferences objects that haven't been touched in > 30 days, in order to reduce abuse risk/space consumed
    """
    min_last_updated = calendar.timegm((datetime.datetime.utcnow() - datetime.timedelta(days=30)).timetuple())

    num_stale_records = config.mongo_db.preferences.find({'last_touched': {'$lt': min_last_updated}}).count()
    config.mongo_db.preferences.remove({'last_touched': {'$lt': min_last_updated}})
    if num_stale_records:
        logger.warn("REMOVED %i stale preferences objects" % num_stale_records)

    start_task(task_expire_stale_prefs, delay=86400)  # call again in 1 day
Пример #2
0
def start_api():
    logger.info("Starting up RPC API handler...")
    group = start_task(api.serve_api)
    group.join()  # block forever
Пример #3
0
def start_cp_blockfeed():
    logger.info("Starting up aspire block feed poller...")
    start_task(blockfeed.process_cp_blockfeed)
Пример #4
0
def start_tasks():
    start_task(task_compile_asset_pair_market_info)
    start_task(task_compile_asset_market_info)
Пример #5
0
def task_compile_asset_market_info():
    assets_trading.compile_asset_market_info()
    # all done for this run...call again in a bit
    start_task(task_compile_asset_market_info,
               delay=COMPILE_ASSET_MARKET_INFO_PERIOD)
Пример #6
0
def start_tasks():
    start_task(task_expire_stale_prefs)
    start_task(task_generate_wallet_stats)
Пример #7
0
def task_generate_wallet_stats():
    """
    Every 30 minutes, from the login history, update and generate wallet stats
    """
    def gen_stats_for_network(network):
        assert network in ('mainnet', 'testnet')
        # get the latest date in the stats table present
        now = datetime.datetime.utcnow()
        latest_stat = config.mongo_db.wallet_stats.find({'network': network}).sort('when', pymongo.DESCENDING).limit(1)
        latest_stat = latest_stat[0] if latest_stat.count() else None
        new_entries = {}

        # the queries below work with data that happened on or after the date of the latest stat present
        # aggregate over the same period for new logins, adding the referrers to a set
        match_criteria = {'when': {"$gte": latest_stat['when']}, 'network': network, 'action': 'create'} \
            if latest_stat else {'when': {"$lte": now}, 'network': network, 'action': 'create'}
        new_wallets = config.mongo_db.login_history.aggregate([
            {"$match": match_criteria},
            {"$project": {
                "year": {"$year": "$when"},
                "month": {"$month": "$when"},
                "day": {"$dayOfMonth": "$when"}
            }},
            {"$group": {
                "_id": {"year": "$year", "month": "$month", "day": "$day"},
                "new_count": {"$sum": 1}
            }}
        ])
        for e in new_wallets:
            ts = calendar.timegm(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple())
            new_entries[ts] = {  # a future wallet_stats entry
                'when': datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']),
                'network': network,
                'new_count': e['new_count'],
            }

        referer_counts = config.mongo_db.login_history.aggregate([
            {"$match": match_criteria},
            {"$project": {
                "year": {"$year": "$when"},
                "month": {"$month": "$when"},
                "day": {"$dayOfMonth": "$when"},
                "referer": 1
            }},
            {"$group": {
                "_id": {"year": "$year", "month": "$month", "day": "$day", "referer": "$referer"},
                #"uniqueReferers": {"$addToSet": "$_id"},
                "count": {"$sum": 1}
            }}
        ])
        for e in referer_counts:
            ts = calendar.timegm(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple())
            assert ts in new_entries
            if e['_id']['referer'] is None:
                continue
            referer_key = urllib.parse.quote(e['_id']['referer']).replace('.', '%2E')
            if 'referers' not in new_entries[ts]:
                new_entries[ts]['referers'] = {}
            if e['_id']['referer'] not in new_entries[ts]['referers']:
                new_entries[ts]['referers'][referer_key] = 0
            new_entries[ts]['referers'][referer_key] += 1

        # logins (not new wallets) - generate stats
        match_criteria = {'when': {"$gte": latest_stat['when']}, 'network': network, 'action': 'login'} \
            if latest_stat else {'when': {"$lte": now}, 'network': network, 'action': 'login'}
        logins = config.mongo_db.login_history.aggregate([
            {"$match": match_criteria},
            {"$project": {
                "year": {"$year": "$when"},
                "month": {"$month": "$when"},
                "day": {"$dayOfMonth": "$when"},
                "wallet_id": 1
            }},
            {"$group": {
                "_id": {"year": "$year", "month": "$month", "day": "$day"},
                "login_count": {"$sum": 1},
                "distinct_wallets": {"$addToSet": "$wallet_id"},
            }}
        ])
        for e in logins:
            ts = calendar.timegm(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple())
            if ts not in new_entries:
                new_entries[ts] = {  # a future wallet_stats entry
                    'when': datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']),
                    'network': network,
                    'new_count': 0,
                    'referers': []
                }
            new_entries[ts]['login_count'] = e['login_count']
            new_entries[ts]['distinct_login_count'] = len(e['distinct_wallets'])

        # add/replace the wallet_stats data
        if latest_stat:
            updated_entry_ts = calendar.timegm(datetime.datetime(
                latest_stat['when'].year, latest_stat['when'].month, latest_stat['when'].day).timetuple())
            if updated_entry_ts in new_entries:
                updated_entry = new_entries[updated_entry_ts]
                del new_entries[updated_entry_ts]
                assert updated_entry['when'] == latest_stat['when']
                del updated_entry['when']  # not required for the upsert
                logger.info(
                    "Revised wallet statistics for partial day %s-%s-%s: %s"
                    % (latest_stat['when'].year, latest_stat['when'].month, latest_stat['when'].day, updated_entry))
                config.mongo_db.wallet_stats.update(
                    {'when': latest_stat['when']},
                    {"$set": updated_entry}, upsert=True)

        if new_entries:  # insert the rest
            #logger.info("Stats, new entries: %s" % new_entries.values())
            config.mongo_db.wallet_stats.insert(list(new_entries.values()))
            logger.info("Added wallet statistics for %i full days" % len(list(new_entries.values())))

    gen_stats_for_network('mainnet')
    gen_stats_for_network('testnet')
    start_task(task_generate_wallet_stats, delay=30 * 60)  # call again in 30 minutes
Пример #8
0
def start_tasks():
    start_task(task_compile_extended_feed_info)
Пример #9
0
def task_compile_extended_feed_info():
    feeds = list(config.mongo_db.feeds.find({'info_status': 'needfetch'}))
    feed_info_urls = []

    def inc_fetch_retry(feed,
                        max_retry=FEED_MAX_RETRY,
                        new_status='error',
                        errors=[]):
        feed['fetch_info_retry'] += 1
        feed['errors'] = errors
        if feed['fetch_info_retry'] == max_retry:
            feed['info_status'] = new_status
        config.mongo_db.feeds.save(feed)

    def process_feed_info(feed, info_data):
        # sanity check
        assert feed['info_status'] == 'needfetch'
        assert 'info_url' in feed
        assert util.is_valid_url(
            feed['info_url'],
            allow_no_protocol=True)  # already validated in the fetch

        errors = util.is_valid_json(info_data, config.FEED_SCHEMA)

        if not isinstance(info_data, dict) or 'address' not in info_data:
            errors.append('Invalid data format')
        elif feed['source'] != info_data['address']:
            errors.append('Invalid address')

        if len(errors) > 0:
            inc_fetch_retry(feed, new_status='invalid', errors=errors)
            return (False, errors)

        feed['info_status'] = 'valid'

        # fetch any associated images...
        # TODO: parallelize this 2nd level feed image fetching ... (e.g. just compose a list here, and process it in later on)
        if 'image' in info_data:
            info_data['valid_image'] = util.fetch_image(
                info_data['image'],
                config.SUBDIR_FEED_IMAGES,
                feed['source'] + '_topic',
                fetch_timeout=5)
        if 'operator' in info_data and 'image' in info_data['operator']:
            info_data['operator']['valid_image'] = util.fetch_image(
                info_data['operator']['image'],
                config.SUBDIR_FEED_IMAGES,
                feed['source'] + '_owner',
                fetch_timeout=5)
        if 'targets' in info_data:
            for i in range(len(info_data['targets'])):
                if 'image' in info_data['targets'][i]:
                    image_name = feed['source'] + '_tv_' + str(
                        info_data['targets'][i]['value'])
                    info_data['targets'][i]['valid_image'] = util.fetch_image(
                        info_data['targets'][i]['image'],
                        config.SUBDIR_FEED_IMAGES,
                        image_name,
                        fetch_timeout=5)

        feed['info_data'] = sanitize_json_data(info_data)
        config.mongo_db.feeds.save(feed)
        return (True, None)

    def feed_fetch_complete_hook(urls_data):
        logger.info(
            "Enhanced feed info fetching complete. %s unique URLs fetched. Processing..."
            % len(urls_data))
        feeds = config.mongo_db.feeds.find({'info_status': 'needfetch'})
        for feed in feeds:
            # logger.debug("Looking at feed %s: %s" % (feed, feed['info_url']))
            if feed['info_url']:
                info_url = ('http://' + feed['info_url']) \
                    if not feed['info_url'].startswith('http://') and not feed['info_url'].startswith('https://') else feed['info_url']
                if info_url not in urls_data:
                    logger.warn(
                        "URL %s not properly fetched (not one of %i entries in urls_data), skipping..."
                        % (info_url, len(urls_data)))
                    continue
                assert info_url in urls_data
                if not urls_data[info_url][0]:  # request was not successful
                    inc_fetch_retry(feed,
                                    max_retry=FEED_MAX_RETRY,
                                    errors=[urls_data[info_url][1]])
                    logger.warn(
                        "Fetch for feed at %s not successful: %s (try %i of %i)"
                        % (info_url, urls_data[info_url][1],
                           feed['fetch_info_retry'], FEED_MAX_RETRY))
                else:
                    result = process_feed_info(feed, urls_data[info_url][1])
                    if not result[0]:
                        logger.info(
                            "Processing for feed at %s not successful: %s" %
                            (info_url, result[1]))
                    else:
                        logger.info("Processing for feed at %s successful" %
                                    info_url)

    # compose and fetch all info URLs in all feeds with them
    for feed in feeds:
        assert feed['info_url']
        feed_info_urls.append((
            'http://' +
            feed['info_url']) if not feed['info_url'].startswith('http://')
                              and not feed['info_url'].startswith('https://')
                              else feed['info_url'])
    feed_info_urls_str = ', '.join(feed_info_urls)
    feed_info_urls_str = (
        feed_info_urls_str[:2000] + ' ...'
    ) if len(feed_info_urls_str
             ) > 2000 else feed_info_urls_str  # truncate if necessary
    if len(feed_info_urls):
        logger.info('Fetching enhanced feed info for %i feeds: %s' %
                    (len(feed_info_urls), feed_info_urls_str))
        util.stream_fetch(feed_info_urls,
                          feed_fetch_complete_hook,
                          fetch_timeout=10,
                          max_fetch_size=4 * 1024,
                          urls_group_size=20,
                          urls_group_time_spacing=20,
                          per_request_complete_callback=lambda url, data:
                          logger.debug("Feed at %s retrieved, result: %s" %
                                       (url, data)))

    start_task(task_compile_extended_feed_info,
               delay=60 * 5)  # call again in 5 minutes
Пример #10
0
def start_tasks():
    start_task(task_compile_extended_asset_info)
Пример #11
0
def task_compile_extended_asset_info():
    assets = list(
        config.mongo_db.asset_extended_info.find({'info_status': 'needfetch'}))
    asset_info_urls = []

    def asset_fetch_complete_hook(urls_data):
        logger.info(
            "Enhanced asset info fetching complete. %s unique URLs fetched. Processing..."
            % len(urls_data))
        for asset in assets:
            logger.debug("Looking at asset %s: %s" %
                         (asset, asset['info_url']))
            if asset['info_url']:
                info_url = ('http://' + asset['info_url']) \
                    if not asset['info_url'].startswith('http://') and not asset['info_url'].startswith('https://') else asset['info_url']
                assert info_url in urls_data
                if not urls_data[info_url][0]:  # request was not successful
                    inc_fetch_retry(asset,
                                    max_retry=ASSET_MAX_RETRY,
                                    errors=[urls_data[info_url][1]])
                    logger.warn(
                        "Fetch for asset at %s not successful: %s (try %i of %i)"
                        % (info_url, urls_data[info_url][1],
                           asset['fetch_info_retry'], ASSET_MAX_RETRY))
                else:
                    result = process_asset_info(asset, urls_data[info_url][1])
                    if not result[0]:
                        logger.info(
                            "Processing for asset %s at %s not successful: %s"
                            % (asset['asset'], info_url, result[1]))
                    else:
                        logger.debug(
                            "Processing for asset %s at %s successful" %
                            (asset['asset'], info_url))

    # compose and fetch all info URLs in all assets with them
    for asset in assets:
        if not asset['info_url']:
            continue

        if asset.get('disabled', False):
            logger.info("ExtendedAssetInfo: Skipping disabled asset %s" %
                        asset['asset'])
            continue

        # may or may not end with .json. may or may not start with http:// or https://
        asset_info_urls.append(
            (('http://' +
              asset['info_url']) if not asset['info_url'].startswith('http://')
             and not asset['info_url'].startswith('https://') else
             asset['info_url']))

    asset_info_urls_str = ', '.join(asset_info_urls)
    asset_info_urls_str = (
        (asset_info_urls_str[:2000] +
         ' ...') if len(asset_info_urls_str) > 2000 else asset_info_urls_str
    )  # truncate if necessary
    if len(asset_info_urls):
        logger.info('Fetching enhanced asset info for %i assets: %s' %
                    (len(asset_info_urls), asset_info_urls_str))
        util.stream_fetch(
            asset_info_urls,
            asset_fetch_complete_hook,
            fetch_timeout=10,
            max_fetch_size=4 * 1024,
            urls_group_size=20,
            urls_group_time_spacing=20,
            per_request_complete_callback=lambda url, data: logger.debug(
                "Asset info URL %s retrieved, result: %s" % (url, data)))

    start_task(task_compile_extended_asset_info,
               delay=60 * 60)  # call again in 60 minutes