def check_blockchain_service(): try: blockchain.check() except Exception as e: raise Exception('Could not connect to blockchain service: %s' % e) finally: start_task(check_blockchain_service, delay=60 * 5) #call again in 5 minutes
def task_expire_stale_prefs(): """ Every day, clear out preferences objects that haven't been touched in > 30 days, in order to reduce abuse risk/space consumed """ min_last_updated = time.mktime((datetime.datetime.utcnow() - datetime.timedelta(days=30)).timetuple()) num_stale_records = config.mongo_db.preferences.find({'last_touched': {'$lt': min_last_updated}}).count() config.mongo_db.preferences.remove({'last_touched': {'$lt': min_last_updated}}) if num_stale_records: logger.warn("REMOVED %i stale preferences objects" % num_stale_records) start_task(task_expire_stale_prefs, delay=86400) #call again in 1 day
def task_expire_stale_prefs(): """ Every day, clear out preferences objects that haven't been touched in > 30 days, in order to reduce abuse risk/space consumed """ min_last_updated = calendar.timegm((datetime.datetime.utcnow() - datetime.timedelta(days=30)).timetuple()) num_stale_records = config.mongo_db.preferences.find({'last_touched': {'$lt': min_last_updated}}).count() config.mongo_db.preferences.remove({'last_touched': {'$lt': min_last_updated}}) if num_stale_records: logger.warn("REMOVED %i stale preferences objects" % num_stale_records) start_task(task_expire_stale_prefs, delay=86400) # call again in 1 day
def task_compile_extended_asset_info(): assets = list(config.mongo_db.asset_extended_info.find({'info_status': 'needfetch'})) asset_info_urls = [] def asset_fetch_complete_hook(urls_data): logger.info("Enhanced asset info fetching complete. %s unique URLs fetched. Processing..." % len(urls_data)) for asset in assets: logger.debug("Looking at asset %s: %s" % (asset, asset['info_url'])) if asset['info_url']: info_url = ('http://' + asset['info_url']) \ if not asset['info_url'].startswith('http://') and not asset['info_url'].startswith('https://') else asset['info_url'] assert info_url in urls_data if not urls_data[info_url][0]: # request was not successful inc_fetch_retry(asset, max_retry=ASSET_MAX_RETRY, errors=[urls_data[info_url][1]]) logger.warn("Fetch for asset at %s not successful: %s (try %i of %i)" % ( info_url, urls_data[info_url][1], asset['fetch_info_retry'], ASSET_MAX_RETRY)) else: result = process_asset_info(asset, urls_data[info_url][1]) if not result[0]: logger.info("Processing for asset %s at %s not successful: %s" % (asset['asset'], info_url, result[1])) else: logger.debug("Processing for asset %s at %s successful" % (asset['asset'], info_url)) # compose and fetch all info URLs in all assets with them for asset in assets: if not asset['info_url']: continue if asset.get('disabled', False): logger.info("ExtendedAssetInfo: Skipping disabled asset %s" % asset['asset']) continue # may or may not end with .json. may or may not start with http:// or https:// asset_info_urls.append(( ('http://' + asset['info_url']) if not asset['info_url'].startswith('http://') and not asset['info_url'].startswith('https://') else asset['info_url'])) asset_info_urls_str = ', '.join(asset_info_urls) asset_info_urls_str = ( (asset_info_urls_str[:2000] + ' ...') if len(asset_info_urls_str) > 2000 else asset_info_urls_str) # truncate if necessary if len(asset_info_urls): logger.info('Fetching enhanced asset info for %i assets: %s' % (len(asset_info_urls), asset_info_urls_str)) util.stream_fetch( asset_info_urls, asset_fetch_complete_hook, fetch_timeout=10, max_fetch_size=4 * 1024, urls_group_size=20, urls_group_time_spacing=20, per_request_complete_callback=lambda url, data: logger.debug("Asset info URL %s retrieved, result: %s" % (url, data))) start_task(task_compile_extended_asset_info, delay=60 * 60) # call again in 60 minutes
def task_compile_asset_market_info(): assets_trading.compile_asset_market_info() #all done for this run...call again in a bit start_task(task_compile_asset_market_info, delay=COMPILE_ASSET_MARKET_INFO_PERIOD)
def start_api(): logger.info("Starting up RPC API handler...") group = start_task(api.serve_api) group.join() # block forever
def start_tasks(): start_task(task_compile_asset_pair_market_info) start_task(task_compile_asset_market_info)
def start_tasks(): start_task(task_expire_stale_prefs) start_task(task_generate_wallet_stats)
def start_tasks(): start_task(task_compile_extended_feed_info)
def task_generate_wallet_stats(): """ Every 30 minutes, from the login history, update and generate wallet stats """ def gen_stats_for_network(network): assert network in ('mainnet', 'testnet') #get the latest date in the stats table present now = datetime.datetime.utcnow() latest_stat = config.mongo_db.wallet_stats.find({'network': network}).sort('when', pymongo.DESCENDING).limit(1) latest_stat = latest_stat[0] if latest_stat.count() else None new_entries = {} #the queries below work with data that happened on or after the date of the latest stat present #aggregate over the same period for new logins, adding the referrers to a set match_criteria = {'when': {"$gte": latest_stat['when']}, 'network': network, 'action': 'create'} \ if latest_stat else {'when': {"$lte": now}, 'network': network, 'action': 'create'} new_wallets = config.mongo_db.login_history.aggregate([ {"$match": match_criteria }, {"$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"} }}, {"$group": { "_id": {"year": "$year", "month": "$month", "day": "$day"}, "new_count": {"$sum": 1} }} ]) for e in new_wallets: ts = time.mktime(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) new_entries[ts] = { #a future wallet_stats entry 'when': datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']), 'network': network, 'new_count': e['new_count'], } referer_counts = config.mongo_db.login_history.aggregate([ {"$match": match_criteria }, {"$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"}, "referer": 1 }}, {"$group": { "_id": {"year": "$year", "month": "$month", "day": "$day", "referer": "$referer"}, #"uniqueReferers": {"$addToSet": "$_id"}, "count": {"$sum": 1} }} ]) for e in referer_counts: ts = time.mktime(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) assert ts in new_entries if e['_id']['referer'] is None: continue referer_key = urllib.quote(e['_id']['referer']).replace('.', '%2E') if 'referers' not in new_entries[ts]: new_entries[ts]['referers'] = {} if e['_id']['referer'] not in new_entries[ts]['referers']: new_entries[ts]['referers'][referer_key] = 0 new_entries[ts]['referers'][referer_key] += 1 #logins (not new wallets) - generate stats match_criteria = {'when': {"$gte": latest_stat['when']}, 'network': network, 'action': 'login'} \ if latest_stat else {'when': {"$lte": now}, 'network': network, 'action': 'login'} logins = config.mongo_db.login_history.aggregate([ {"$match": match_criteria }, {"$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"}, "wallet_id": 1 }}, {"$group": { "_id": {"year": "$year", "month": "$month", "day": "$day"}, "login_count": {"$sum": 1}, "distinct_wallets": {"$addToSet": "$wallet_id"}, }} ]) for e in logins: ts = time.mktime(datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) if ts not in new_entries: new_entries[ts] = { #a future wallet_stats entry 'when': datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']), 'network': network, 'new_count': 0, 'referers': [] } new_entries[ts]['login_count'] = e['login_count'] new_entries[ts]['distinct_login_count'] = len(e['distinct_wallets']) #add/replace the wallet_stats data if latest_stat: updated_entry_ts = time.mktime(datetime.datetime( latest_stat['when'].year, latest_stat['when'].month, latest_stat['when'].day).timetuple()) if updated_entry_ts in new_entries: updated_entry = new_entries[updated_entry_ts] del new_entries[updated_entry_ts] assert updated_entry['when'] == latest_stat['when'] del updated_entry['when'] #not required for the upsert logger.info("Revised wallet statistics for partial day %s-%s-%s: %s" % ( latest_stat['when'].year, latest_stat['when'].month, latest_stat['when'].day, updated_entry)) config.mongo_db.wallet_stats.update({'when': latest_stat['when']}, {"$set": updated_entry}, upsert=True) if new_entries: #insert the rest #logger.info("Stats, new entries: %s" % new_entries.values()) config.mongo_db.wallet_stats.insert(new_entries.values()) logger.info("Added wallet statistics for %i full days" % len(new_entries.values())) gen_stats_for_network('mainnet') gen_stats_for_network('testnet') start_task(task_generate_wallet_stats, delay=30 * 60) #call again in 30 minutes
def start_api(): logger.info("Starting up RPC API handler...") group = start_task(api.serve_api) group.join() #block forever
def start_cp_blockfeed(): logger.info("Starting up counterparty block feed poller...") start_task(blockfeed.process_cp_blockfeed)
def start_tasks(): start_task(task_compile_extended_asset_info)
def task_compile_extended_feed_info(): feeds = list(config.mongo_db.feeds.find({'info_status': 'needfetch'})) feed_info_urls = [] def inc_fetch_retry(feed, max_retry=FEED_MAX_RETRY, new_status='error', errors=[]): feed['fetch_info_retry'] += 1 feed['errors'] = errors if feed['fetch_info_retry'] == max_retry: feed['info_status'] = new_status config.mongo_db.feeds.save(feed) def process_feed_info(feed, info_data): # sanity check assert feed['info_status'] == 'needfetch' assert 'info_url' in feed assert util.is_valid_url( feed['info_url'], allow_no_protocol=True) # already validated in the fetch errors = util.is_valid_json(info_data, config.FEED_SCHEMA) if not isinstance(info_data, dict) or 'address' not in info_data: errors.append('Invalid data format') elif feed['source'] != info_data['address']: errors.append('Invalid address') if len(errors) > 0: inc_fetch_retry(feed, new_status='invalid', errors=errors) return (False, errors) feed['info_status'] = 'valid' # fetch any associated images... # TODO: parallelize this 2nd level feed image fetching ... (e.g. just compose a list here, and process it in later on) if 'image' in info_data: info_data['valid_image'] = util.fetch_image( info_data['image'], config.SUBDIR_FEED_IMAGES, feed['source'] + '_topic', fetch_timeout=5) if 'operator' in info_data and 'image' in info_data['operator']: info_data['operator']['valid_image'] = util.fetch_image( info_data['operator']['image'], config.SUBDIR_FEED_IMAGES, feed['source'] + '_owner', fetch_timeout=5) if 'targets' in info_data: for i in range(len(info_data['targets'])): if 'image' in info_data['targets'][i]: image_name = feed['source'] + '_tv_' + str( info_data['targets'][i]['value']) info_data['targets'][i]['valid_image'] = util.fetch_image( info_data['targets'][i]['image'], config.SUBDIR_FEED_IMAGES, image_name, fetch_timeout=5) feed['info_data'] = sanitize_json_data(info_data) config.mongo_db.feeds.save(feed) return (True, None) def feed_fetch_complete_hook(urls_data): logger.info( "Enhanced feed info fetching complete. %s unique URLs fetched. Processing..." % len(urls_data)) feeds = config.mongo_db.feeds.find({'info_status': 'needfetch'}) for feed in feeds: #logger.debug("Looking at feed %s: %s" % (feed, feed['info_url'])) if feed['info_url']: info_url = ('http://' + feed['info_url']) \ if not feed['info_url'].startswith('http://') and not feed['info_url'].startswith('https://') else feed['info_url'] if info_url not in urls_data: logger.warn( "URL %s not properly fetched (not one of %i entries in urls_data), skipping..." % (info_url, len(urls_data))) continue assert info_url in urls_data if not urls_data[info_url][0]: # request was not successful inc_fetch_retry(feed, max_retry=FEED_MAX_RETRY, errors=[urls_data[info_url][1]]) logger.warn( "Fetch for feed at %s not successful: %s (try %i of %i)" % (info_url, urls_data[info_url][1], feed['fetch_info_retry'], FEED_MAX_RETRY)) else: result = process_feed_info(feed, urls_data[info_url][1]) if not result[0]: logger.info( "Processing for feed at %s not successful: %s" % (info_url, result[1])) else: logger.info("Processing for feed at %s successful" % info_url) # compose and fetch all info URLs in all feeds with them for feed in feeds: assert feed['info_url'] feed_info_urls.append(( 'http://' + feed['info_url']) if not feed['info_url'].startswith('http://') and not feed['info_url'].startswith('https://') else feed['info_url']) feed_info_urls_str = ', '.join(feed_info_urls) feed_info_urls_str = ( feed_info_urls_str[:2000] + ' ...' ) if len(feed_info_urls_str ) > 2000 else feed_info_urls_str # truncate if necessary if len(feed_info_urls): logger.info('Fetching enhanced feed info for %i feeds: %s' % (len(feed_info_urls), feed_info_urls_str)) util.stream_fetch(feed_info_urls, feed_fetch_complete_hook, fetch_timeout=10, max_fetch_size=4 * 1024, urls_group_size=20, urls_group_time_spacing=20, per_request_complete_callback=lambda url, data: logger.debug("Feed at %s retrieved, result: %s" % (url, data))) start_task(task_compile_extended_feed_info, delay=60 * 5) # call again in 5 minutes
def task_compile_extended_feed_info(): feeds = list(config.mongo_db.feeds.find({"info_status": "needfetch"})) feed_info_urls = [] def inc_fetch_retry(feed, max_retry=FEED_MAX_RETRY, new_status="error", errors=[]): feed["fetch_info_retry"] += 1 feed["errors"] = errors if feed["fetch_info_retry"] == max_retry: feed["info_status"] = new_status config.mongo_db.feeds.save(feed) def process_feed_info(feed, info_data): # sanity check assert feed["info_status"] == "needfetch" assert "info_url" in feed assert util.is_valid_url(feed["info_url"], allow_no_protocol=True) # already validated in the fetch errors = util.is_valid_json(info_data, config.FEED_SCHEMA) if not isinstance(info_data, dict) or "address" not in info_data: errors.append("Invalid data format") elif feed["source"] != info_data["address"]: errors.append("Invalid address") if len(errors) > 0: inc_fetch_retry(feed, new_status="invalid", errors=errors) return (False, errors) feed["info_status"] = "valid" # fetch any associated images... # TODO: parallelize this 2nd level feed image fetching ... (e.g. just compose a list here, and process it in later on) if "image" in info_data: info_data["valid_image"] = util.fetch_image( info_data["image"], config.SUBDIR_FEED_IMAGES, feed["source"] + "_topic", fetch_timeout=5 ) if "operator" in info_data and "image" in info_data["operator"]: info_data["operator"]["valid_image"] = util.fetch_image( info_data["operator"]["image"], config.SUBDIR_FEED_IMAGES, feed["source"] + "_owner", fetch_timeout=5 ) if "targets" in info_data: for i in range(len(info_data["targets"])): if "image" in info_data["targets"][i]: image_name = feed["source"] + "_tv_" + str(info_data["targets"][i]["value"]) info_data["targets"][i]["valid_image"] = util.fetch_image( info_data["targets"][i]["image"], config.SUBDIR_FEED_IMAGES, image_name, fetch_timeout=5 ) feed["info_data"] = sanitize_json_data(info_data) config.mongo_db.feeds.save(feed) return (True, None) def feed_fetch_complete_hook(urls_data): logger.info("Enhanced feed info fetching complete. %s unique URLs fetched. Processing..." % len(urls_data)) feeds = config.mongo_db.feeds.find({"info_status": "needfetch"}) for feed in feeds: # logger.debug("Looking at feed %s: %s" % (feed, feed['info_url'])) if feed["info_url"]: info_url = ( ("http://" + feed["info_url"]) if not feed["info_url"].startswith("http://") and not feed["info_url"].startswith("https://") else feed["info_url"] ) if info_url not in urls_data: logger.warn( "URL %s not properly fetched (not one of %i entries in urls_data), skipping..." % (info_url, len(urls_data)) ) continue assert info_url in urls_data if not urls_data[info_url][0]: # request was not successful inc_fetch_retry(feed, max_retry=FEED_MAX_RETRY, errors=[urls_data[info_url][1]]) logger.warn( "Fetch for feed at %s not successful: %s (try %i of %i)" % (info_url, urls_data[info_url][1], feed["fetch_info_retry"], FEED_MAX_RETRY) ) else: result = process_feed_info(feed, urls_data[info_url][1]) if not result[0]: logger.info("Processing for feed at %s not successful: %s" % (info_url, result[1])) else: logger.info("Processing for feed at %s successful" % info_url) # compose and fetch all info URLs in all feeds with them for feed in feeds: assert feed["info_url"] feed_info_urls.append( ("http://" + feed["info_url"]) if not feed["info_url"].startswith("http://") and not feed["info_url"].startswith("https://") else feed["info_url"] ) feed_info_urls_str = ", ".join(feed_info_urls) feed_info_urls_str = ( (feed_info_urls_str[:2000] + " ...") if len(feed_info_urls_str) > 2000 else feed_info_urls_str ) # truncate if necessary if len(feed_info_urls): logger.info("Fetching enhanced feed info for %i feeds: %s" % (len(feed_info_urls), feed_info_urls_str)) util.stream_fetch( feed_info_urls, feed_fetch_complete_hook, fetch_timeout=10, max_fetch_size=4 * 1024, urls_group_size=20, urls_group_time_spacing=20, per_request_complete_callback=lambda url, data: logger.debug( "Feed at %s retrieved, result: %s" % (url, data) ), ) start_task(task_compile_extended_feed_info, delay=60 * 5) # call again in 5 minutes
def task_generate_wallet_stats(): """ Every 30 minutes, from the login history, update and generate wallet stats """ def gen_stats_for_network(network): assert network in ('mainnet', 'testnet', 'regtest') # get the latest date in the stats table present now = datetime.datetime.utcnow() latest_stat = config.mongo_db.wallet_stats.find({ 'network': network }).sort('when', pymongo.DESCENDING).limit(1) latest_stat = latest_stat[0] if latest_stat.count() else None new_entries = {} # the queries below work with data that happened on or after the date of the latest stat present # aggregate over the same period for new logins, adding the referrers to a set match_criteria = {'when': {"$gte": latest_stat['when']}, 'network': network, 'action': 'create'} \ if latest_stat else {'when': {"$lte": now}, 'network': network, 'action': 'create'} new_wallets = config.mongo_db.login_history.aggregate([{ "$match": match_criteria }, { "$project": { "year": { "$year": "$when" }, "month": { "$month": "$when" }, "day": { "$dayOfMonth": "$when" } } }, { "$group": { "_id": { "year": "$year", "month": "$month", "day": "$day" }, "new_count": { "$sum": 1 } } }]) for e in new_wallets: ts = calendar.timegm( datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) new_entries[ts] = { # a future wallet_stats entry 'when': datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']), 'network': network, 'new_count': e['new_count'], } referer_counts = config.mongo_db.login_history.aggregate([ { "$match": match_criteria }, { "$project": { "year": { "$year": "$when" }, "month": { "$month": "$when" }, "day": { "$dayOfMonth": "$when" }, "referer": 1 } }, { "$group": { "_id": { "year": "$year", "month": "$month", "day": "$day", "referer": "$referer" }, #"uniqueReferers": {"$addToSet": "$_id"}, "count": { "$sum": 1 } } } ]) for e in referer_counts: ts = calendar.timegm( datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) assert ts in new_entries if e['_id']['referer'] is None: continue referer_key = urllib.parse.quote(e['_id']['referer']).replace( '.', '%2E') if 'referers' not in new_entries[ts]: new_entries[ts]['referers'] = {} if e['_id']['referer'] not in new_entries[ts]['referers']: new_entries[ts]['referers'][referer_key] = 0 new_entries[ts]['referers'][referer_key] += 1 # logins (not new wallets) - generate stats match_criteria = {'when': {"$gte": latest_stat['when']}, 'network': network, 'action': 'login'} \ if latest_stat else {'when': {"$lte": now}, 'network': network, 'action': 'login'} logins = config.mongo_db.login_history.aggregate([{ "$match": match_criteria }, { "$project": { "year": { "$year": "$when" }, "month": { "$month": "$when" }, "day": { "$dayOfMonth": "$when" }, "wallet_id": 1 } }, { "$group": { "_id": { "year": "$year", "month": "$month", "day": "$day" }, "login_count": { "$sum": 1 }, "distinct_wallets": { "$addToSet": "$wallet_id" }, } }]) for e in logins: ts = calendar.timegm( datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']).timetuple()) if ts not in new_entries: new_entries[ts] = { # a future wallet_stats entry 'when': datetime.datetime(e['_id']['year'], e['_id']['month'], e['_id']['day']), 'network': network, 'new_count': 0, 'referers': [] } new_entries[ts]['login_count'] = e['login_count'] new_entries[ts]['distinct_login_count'] = len( e['distinct_wallets']) # add/replace the wallet_stats data if latest_stat: updated_entry_ts = calendar.timegm( datetime.datetime(latest_stat['when'].year, latest_stat['when'].month, latest_stat['when'].day).timetuple()) if updated_entry_ts in new_entries: updated_entry = new_entries[updated_entry_ts] del new_entries[updated_entry_ts] assert updated_entry['when'] == latest_stat['when'] del updated_entry['when'] # not required for the upsert logger.info( "Revised wallet statistics for partial day %s-%s-%s: %s" % (latest_stat['when'].year, latest_stat['when'].month, latest_stat['when'].day, updated_entry)) config.mongo_db.wallet_stats.update( {'when': latest_stat['when']}, {"$set": updated_entry}, upsert=True) if new_entries: # insert the rest #logger.info("Stats, new entries: %s" % new_entries.values()) config.mongo_db.wallet_stats.insert(list(new_entries.values())) logger.info("Added wallet statistics for %i full days" % len(list(new_entries.values()))) gen_stats_for_network('mainnet') gen_stats_for_network('testnet') gen_stats_for_network('regtest') start_task(task_generate_wallet_stats, delay=30 * 60) # call again in 30 minutes
def task_compile_asset_market_info(): assets_trading.compile_asset_market_info() # all done for this run...call again in a bit start_task(task_compile_asset_market_info, delay=COMPILE_ASSET_MARKET_INFO_PERIOD)
def task_generate_wallet_stats(): """ Every 30 minutes, from the login history, update and generate wallet stats """ def gen_stats_for_network(network): assert network in ("mainnet", "testnet") # get the latest date in the stats table present now = datetime.datetime.utcnow() latest_stat = config.mongo_db.wallet_stats.find({"network": network}).sort("when", pymongo.DESCENDING).limit(1) latest_stat = latest_stat[0] if latest_stat.count() else None new_entries = {} # the queries below work with data that happened on or after the date of the latest stat present # aggregate over the same period for new logins, adding the referrers to a set match_criteria = ( {"when": {"$gte": latest_stat["when"]}, "network": network, "action": "create"} if latest_stat else {"when": {"$lte": now}, "network": network, "action": "create"} ) new_wallets = config.mongo_db.login_history.aggregate( [ {"$match": match_criteria}, { "$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"}, } }, {"$group": {"_id": {"year": "$year", "month": "$month", "day": "$day"}, "new_count": {"$sum": 1}}}, ] ) for e in new_wallets: ts = calendar.timegm(datetime.datetime(e["_id"]["year"], e["_id"]["month"], e["_id"]["day"]).timetuple()) new_entries[ts] = { # a future wallet_stats entry "when": datetime.datetime(e["_id"]["year"], e["_id"]["month"], e["_id"]["day"]), "network": network, "new_count": e["new_count"], } referer_counts = config.mongo_db.login_history.aggregate( [ {"$match": match_criteria}, { "$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"}, "referer": 1, } }, { "$group": { "_id": {"year": "$year", "month": "$month", "day": "$day", "referer": "$referer"}, # "uniqueReferers": {"$addToSet": "$_id"}, "count": {"$sum": 1}, } }, ] ) for e in referer_counts: ts = calendar.timegm(datetime.datetime(e["_id"]["year"], e["_id"]["month"], e["_id"]["day"]).timetuple()) assert ts in new_entries if e["_id"]["referer"] is None: continue referer_key = urllib.parse.quote(e["_id"]["referer"]).replace(".", "%2E") if "referers" not in new_entries[ts]: new_entries[ts]["referers"] = {} if e["_id"]["referer"] not in new_entries[ts]["referers"]: new_entries[ts]["referers"][referer_key] = 0 new_entries[ts]["referers"][referer_key] += 1 # logins (not new wallets) - generate stats match_criteria = ( {"when": {"$gte": latest_stat["when"]}, "network": network, "action": "login"} if latest_stat else {"when": {"$lte": now}, "network": network, "action": "login"} ) logins = config.mongo_db.login_history.aggregate( [ {"$match": match_criteria}, { "$project": { "year": {"$year": "$when"}, "month": {"$month": "$when"}, "day": {"$dayOfMonth": "$when"}, "wallet_id": 1, } }, { "$group": { "_id": {"year": "$year", "month": "$month", "day": "$day"}, "login_count": {"$sum": 1}, "distinct_wallets": {"$addToSet": "$wallet_id"}, } }, ] ) for e in logins: ts = calendar.timegm(datetime.datetime(e["_id"]["year"], e["_id"]["month"], e["_id"]["day"]).timetuple()) if ts not in new_entries: new_entries[ts] = { # a future wallet_stats entry "when": datetime.datetime(e["_id"]["year"], e["_id"]["month"], e["_id"]["day"]), "network": network, "new_count": 0, "referers": [], } new_entries[ts]["login_count"] = e["login_count"] new_entries[ts]["distinct_login_count"] = len(e["distinct_wallets"]) # add/replace the wallet_stats data if latest_stat: updated_entry_ts = calendar.timegm( datetime.datetime( latest_stat["when"].year, latest_stat["when"].month, latest_stat["when"].day ).timetuple() ) if updated_entry_ts in new_entries: updated_entry = new_entries[updated_entry_ts] del new_entries[updated_entry_ts] assert updated_entry["when"] == latest_stat["when"] del updated_entry["when"] # not required for the upsert logger.info( "Revised wallet statistics for partial day %s-%s-%s: %s" % (latest_stat["when"].year, latest_stat["when"].month, latest_stat["when"].day, updated_entry) ) config.mongo_db.wallet_stats.update({"when": latest_stat["when"]}, {"$set": updated_entry}, upsert=True) if new_entries: # insert the rest # logger.info("Stats, new entries: %s" % new_entries.values()) config.mongo_db.wallet_stats.insert(list(new_entries.values())) logger.info("Added wallet statistics for %i full days" % len(list(new_entries.values()))) gen_stats_for_network("mainnet") gen_stats_for_network("testnet") start_task(task_generate_wallet_stats, delay=30 * 60) # call again in 30 minutes