def test_get_content_batch(): client = SteemClient.instance() tuples = [('test-safari', 'may-spam'), ('test-safari', 'june-spam')] posts = client.get_content_batch(tuples) assert len(posts) == 2 assert posts[0]['author'] == 'test-safari' assert posts[1]['author'] == 'test-safari'
def verify_head(cls): """Perform a fork recovery check on startup.""" hive_head = cls.head_num() if not hive_head: return # move backwards from head until hive/steem agree to_pop = [] cursor = hive_head steemd = SteemClient.instance() while True: assert hive_head - cursor < 25, "fork too deep" hive_block = cls._get(cursor) steem_hash = steemd.get_block(cursor)['block_id'] match = hive_block['hash'] == steem_hash log.info("[INIT] fork check. block %d: %s vs %s --- %s", hive_block['num'], hive_block['hash'], steem_hash, 'ok' if match else 'invalid') if match: break to_pop.append(hive_block) cursor -= 1 if hive_head == cursor: return # no fork! log.error("[FORK] depth is %d; popping blocks %d - %d", hive_head - cursor, cursor + 1, hive_head) # we should not attempt to recover from fork until it's safe fork_limit = steemd.last_irreversible() assert cursor < fork_limit, "not proceeding until head is irreversible" cls._pop(to_pop)
def _load_account(name): #account = load_accounts([name])[0] #for key in ['recent_replies', 'comments', 'feed', 'blog']: # account[key] = [] # need to audit all assumed condenser keys.. from hive.steem.client import SteemClient account = SteemClient.instance().get_accounts([name])[0] return account
def from_steemd(cls, is_initial_sync=False, chunk_size=1000): """Fast sync strategy: read/process blocks in batches.""" steemd = SteemClient.instance() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() count = ubound - lbound if count < 1: return _abort = False try: print("[SYNC] start block %d, +%d to sync" % (lbound, count)) timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: timer.batch_start() # fetch blocks to = min(lbound + chunk_size, ubound) blocks = steemd.get_blocks_range(lbound, to) lbound = to timer.batch_lap() # process blocks Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) date = blocks[-1]['timestamp'] print( timer.batch_status("[SYNC] Got block %d @ %s" % (to - 1, date))) except KeyboardInterrupt: traceback.print_exc() print("\n\n[SYNC] Aborted.. cleaning up..") _abort = True if not is_initial_sync: # This flush is low importance; accounts are swept regularly. if not _abort: Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True) if _abort: print("[SYNC] Aborted") exit()
def _update_batch(cls, tuples, trx=True, full_total=None): """Fetch, process, and write a batch of posts. Given a set of posts, fetch from steemd and write them to the db. The `tuples` arg is the form of `[(url, id, level)*]` representing posts which are to be fetched from steemd and updated in cache. Regarding _bump_last_id: there's a rare edge case when the last hive_post entry has been deleted "in the future" (ie, we haven't seen the delete op yet). So even when the post is not found (i.e. `not post['author']`), it's important to advance _last_id, because this cursor is used to deduce any missing cache entries. """ steemd = SteemClient.instance() timer = Timer(total=len(tuples), entity='post', laps=['rps', 'wps'], full_total=full_total) tuples = sorted(tuples, key=lambda x: x[1]) # enforce ASC id's for tups in partition_all(1000, tuples): timer.batch_start() buffer = [] post_args = [tup[0].split('/') for tup in tups] posts = steemd.get_content_batch(post_args) post_ids = [tup[1] for tup in tups] post_levels = [tup[2] for tup in tups] for pid, post, level in zip(post_ids, posts, post_levels): if post['author']: buffer.extend(cls._sql(pid, post, level=level)) else: # When a post has been deleted (or otherwise DNE), # steemd simply returns a blank post object w/ all # fields blank. While it's best to not try to cache # already-deleted posts, it can happen during missed # post sweep and while using `trail_blocks` > 0. pass cls._bump_last_id(pid) timer.batch_lap() DB.batch_queries(buffer, trx) timer.batch_finish(len(posts)) if len(tuples) >= 1000: log.info(timer.batch_status())
def test_stream_blocks(): client = SteemClient.instance() start_at = client.last_irreversible() stop_at = client.head_block() + 2 streamed = 0 with pytest.raises(KeyboardInterrupt): for block in client.stream_blocks(start_at, trail_blocks=0, max_gap=100): assert 'block_id' in block num = int(block['block_id'][:8], base=16) assert num == start_at + streamed streamed += 1 if streamed >= 20 and num >= stop_at: raise KeyboardInterrupt assert streamed >= 20 assert num >= stop_at
def _update_batch(cls, tuples, trx=True, full_total=None): """Fetch, process, and write a batch of posts. Given a set of posts, fetch from steemd and write them to the db. The `tuples` arg is the form of `[(url, id, level)*]` representing posts which are to be fetched from steemd and updated in cache. Regarding _bump_last_id: there's a rare edge case when the last hive_post entry has been deleted "in the future" (ie, we haven't seen the delete op yet). So even when the post is not found (i.e. `not post['author']`), it's important to advance _last_id, because this cursor is used to deduce any missing cache entries. """ steemd = SteemClient.instance() timer = Timer(total=len(tuples), entity='post', laps=['rps', 'wps'], full_total=full_total) tuples = sorted(tuples, key=lambda x: x[1]) # enforce ASC id's for tups in partition_all(1000, tuples): timer.batch_start() buffer = [] post_args = [tup[0].split('/') for tup in tups] posts = steemd.get_content_batch(post_args) post_ids = [tup[1] for tup in tups] post_levels = [tup[2] for tup in tups] for pid, post, level in zip(post_ids, posts, post_levels): if post['author']: buffer.append(cls._sql(pid, post, level=level)) else: # expected to happen when sweeping missed posts as # part of initial sync or crash recovery routine, # otherwise indicates potential bug. TODO: assert? if not cls._sweeping_missed: print("WARNING: missing/deleted post %d" % pid) cls._bump_last_id(pid) timer.batch_lap() cls._batch_queries(buffer, trx) timer.batch_finish(len(posts)) if len(tuples) >= 1000: print(timer.batch_status())
def _generate_cache_sqls(cls, accounts): """Prepare a SQL query from a steemd account.""" cached_at = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') sqls = [] for account in SteemClient.instance().get_accounts(accounts): vote_weight = (vests_amount(account['vesting_shares']) + vests_amount(account['received_vesting_shares']) - vests_amount(account['delegated_vesting_shares'])) # remove empty keys useless = [ 'transfer_history', 'market_history', 'post_history', 'vote_history', 'other_history', 'tags_usage', 'guest_bloggers' ] for key in useless: del account[key] # pull out valid profile md and delete the key profile = safe_profile_metadata(account) del account['json_metadata'] values = { 'name': account['name'], 'proxy': account['proxy'], 'post_count': account['post_count'], 'reputation': rep_log10(account['reputation']), 'proxy_weight': vests_amount(account['vesting_shares']), 'vote_weight': vote_weight, 'kb_used': int(account['lifetime_bandwidth']) / 1e6 / 1024, 'active_at': account['last_bandwidth_update'], 'cached_at': cached_at, 'display_name': profile['name'], 'about': profile['about'], 'location': profile['location'], 'website': profile['website'], 'profile_image': profile['profile_image'], 'cover_image': profile['cover_image'], 'raw_json': json.dumps(account) } update = ', '.join([k + " = :" + k for k in list(values.keys())][1:]) sql = "UPDATE hive_accounts SET %s WHERE name = :name" % (update) sqls.append((sql, values)) return sqls
def listen(cls): """Live (block following) mode.""" trail_blocks = Conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks <= 100 # debug: no max gap if disable_sync in effect max_gap = None if Conf.get('disable_sync') else 100 steemd = SteemClient.instance() hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap): start_time = time.perf_counter() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, spread=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (time.perf_counter() - start_time) * 1000 print( "[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accounts,% 3d follows --% 5dms%s" % (num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '')) # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: cls._update_chain_state(steemd)
def from_steemd(cls, is_initial_sync=False, chunk_size=1000): """Fast sync strategy: read/process blocks in batches.""" steemd = SteemClient.instance() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() count = ubound - lbound if count < 1: return log.info("[SYNC] start block %d, +%d to sync", lbound, count) timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: timer.batch_start() # fetch blocks to = min(lbound + chunk_size, ubound) blocks = steemd.get_blocks_range(lbound, to) lbound = to timer.batch_lap() # process blocks Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) _prefix = ("[SYNC] Got block %d @ %s" % (to - 1, blocks[-1]['timestamp'])) log.info(timer.batch_status(_prefix)) if not is_initial_sync: # This flush is low importance; accounts are swept regularly. Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True)
def test_instance(): assert isinstance(SteemClient.instance(), SteemClient)
def test_get_blocks_range(): client = SteemClient.instance() lbound = 23000000 blocks = client.get_blocks_range(lbound, lbound + 5) assert len(blocks) == 5
def test_gdgp_extended(): client = SteemClient.instance() ret = client.gdgp_extended() assert 'dgpo' in ret assert 'head_block_number' in ret['dgpo'] assert 'usd_per_steem' in ret
def test_last_irreversible(): client = SteemClient.instance() assert client.last_irreversible() > 23e6
def test_head_block(): client = SteemClient.instance() assert client.head_block() > 23e6
def test_head_time(): client = SteemClient.instance() head = parse_time(client.head_time()) assert head > datetime.datetime.now() - datetime.timedelta(minutes=15)
def test_get_block(): client = SteemClient.instance() block = client.get_block(23494494) assert block['block_id'] == '01667f5e194c421aa00eb02270d3219a5d9bf339'
def test_get_accounts(): client = SteemClient.instance() accounts = client.get_accounts(['steemit', 'test-safari']) assert len(accounts) == 2 assert accounts[0]['name'] == 'steemit'