def _process_legacy(cls, account, op_json, block_date): """Handle legacy 'follow' plugin ops (follow/mute/clear, reblog) follow {follower: {type: 'account'}, following: {type: 'account'}, what: {type: 'list'}} reblog {account: {type: 'account'}, author: {type: 'account'}, permlink: {type: 'permlink'}, delete: {type: 'str', optional: True}} """ if not isinstance(op_json, list): return if len(op_json) != 2: return if first(op_json) not in ['follow', 'reblog']: return if not isinstance(second(op_json), dict): return cmd, op_json = op_json # ['follow', {data...}] if cmd == 'follow': Follow.follow_op(account, op_json, block_date) elif cmd == 'reblog': cls.reblog(account, op_json, block_date)
def close_own_db_access(cls): PostDataCache.close_own_db_access() Reputations.close_own_db_access() Votes.close_own_db_access() Follow.close_own_db_access() Posts.close_own_db_access() Reblog.close_own_db_access() Notify.close_own_db_access() Accounts.close_own_db_access() PayoutStats.close_own_db_access() Mentions.close_own_db_access()
def setup_own_db_access(cls, sharedDbAdapter): PostDataCache.setup_own_db_access(sharedDbAdapter, "PostDataCache") Reputations.setup_own_db_access(sharedDbAdapter, "Reputations") Votes.setup_own_db_access(sharedDbAdapter, "Votes") Follow.setup_own_db_access(sharedDbAdapter, "Follow") Posts.setup_own_db_access(sharedDbAdapter, "Posts") Reblog.setup_own_db_access(sharedDbAdapter, "Reblog") Notify.setup_own_db_access(sharedDbAdapter, "Notify") Accounts.setup_own_db_access(sharedDbAdapter, "Accounts") PayoutStats.setup_own_db_access(sharedDbAdapter, "PayoutStats") Mentions.setup_own_db_access(sharedDbAdapter, "Mentions")
def initial(self): """Initial sync routine.""" assert DbState.is_initial_sync(), "already synced" log.info("[INIT] *** Initial fast sync ***") self.from_checkpoints() self.from_steemd(is_initial_sync=True) log.info("[INIT] *** Initial cache build ***") CachedPost.recover_missing_posts(self._steem) FeedCache.rebuild() Follow.force_recount()
def process_multi(cls, blocks, is_initial_sync=False): query("START TRANSACTION") for block in blocks: cls._process(block, is_initial_sync) # Follows flushing needs to be atomic because recounts are # expensive. So is tracking follows at all; hence we track # deltas in memory and update follow/er counts in bulk. Follow.flush(trx=False) query("COMMIT")
def initial(cls): """Initial sync routine.""" assert DbState.is_initial_sync(), "already synced" print("[INIT] *** Initial fast sync ***") cls.from_checkpoints() cls.from_steemd(is_initial_sync=True) print("[INIT] *** Initial cache build ***") # TODO: disable indexes during this process CachedPost.recover_missing_posts() FeedCache.rebuild() Follow.force_recount()
def sync_from_steemd(): is_initial_sync = DbState.is_initial_sync() steemd = get_adapter() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() if ubound <= lbound: return _abort = False try: print("[SYNC] start block %d, +%d to sync" % (lbound, ubound-lbound+1)) timer = Timer(ubound - lbound, entity='block', laps=['rps', 'wps']) while lbound < ubound: to = min(lbound + 1000, ubound) timer.batch_start() blocks = steemd.get_blocks_range(lbound, to) timer.batch_lap() Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) date = blocks[-1]['timestamp'] print(timer.batch_status("[SYNC] Got block %d @ %s" % (to-1, date))) lbound = to except KeyboardInterrupt: traceback.print_exc() print("\n\n[SYNC] Aborted.. cleaning up..") _abort = True if not is_initial_sync: # Follows flushing may need to be moved closer to core (i.e. moved # into main block transactions). Important to keep in sync since # we need to prevent expensive recounts. This will fail if we aborted # in the middle of a transaction, meaning data loss. Better than # forcing it, however, since in-memory cache will be out of sync # with db state. Follow.flush(trx=True) # This flush is low importance; accounts are swept regularly. if not _abort: Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True) if _abort: print("[SYNC] Aborted") exit()
def _check_migrations(cls): """Check current migration version and perform updates as needed.""" cls._ver = cls.db().query_one("SELECT db_version FROM hive_state LIMIT 1") assert cls._ver is not None, 'could not load state record' if cls._ver == 0: # first run! #cls._set_ver(1) cls._set_ver(2) if cls._ver == 1: from hive.indexer.follow import Follow Follow.force_recount() cls._set_ver(2)
def _process_legacy(cls, account, op_json, block_date): """Handle legacy 'follow' plugin ops (follow/mute/clear, reblog)""" if not isinstance(op_json, list): return if len(op_json) != 2: return if first(op_json) not in ['follow', 'reblog']: return if not isinstance(second(op_json), dict): return cmd, op_json = op_json # ['follow', {data...}] if cmd == 'follow': Follow.follow_op(account, op_json, block_date) elif cmd == 'reblog': cls.reblog(account, op_json, block_date)
def listen(cls): trail_blocks = Conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks < 25 steemd = SteemClient.instance() hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap=40): start_time = time.perf_counter() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, period=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (time.perf_counter() - start_time) * 1000 print("[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accounts,% 3d follows --% 5dms%s" % (num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '')) # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: cls._update_chain_state(steemd)
def process_multi(cls, blocks, is_initial_sync=False): """Batch-process blocks; wrapped in a transaction.""" query("START TRANSACTION") last_num = 0 try: for block in blocks: last_num = cls._process(block, is_initial_sync) except Exception as e: print("[FATAL] could not process block %d" % (last_num + 1)) raise e # Follows flushing needs to be atomic because recounts are # expensive. So is tracking follows at all; hence we track # deltas in memory and update follow/er counts in bulk. Follow.flush(trx=False) query("COMMIT")
def listen(self): """Live (block following) mode.""" trail_blocks = self._conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks <= 100 # debug: no max gap if disable_sync in effect max_gap = None if self._conf.get('test_disable_sync') else 100 steemd = self._steem hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap): start_time = perf() self._db.query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(steemd, trx=False, spread=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(steemd, trx=False) self._db.query("COMMIT") ms = (perf() - start_time) * 1000 log.info( "[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d counts,% 3d accts,% 3d follows" " --% 5dms%s", num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], cnt['recount'], accts, follows, ms, ' SLOW' if ms > 1000 else '') if num % 1200 == 0: #1hr log.warning("head block %d @ %s", num, block['timestamp']) log.info("[LIVE] hourly stats") Accounts.fetch_ranks() #Community.recalc_pending_payouts() if num % 200 == 0: #10min Community.recalc_pending_payouts() if num % 100 == 0: #5min log.info("[LIVE] 5-min stats") Accounts.dirty_oldest(500) if num % 20 == 0: #1min self._update_chain_state()
def listen(cls): """Live (block following) mode.""" trail_blocks = Conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks <= 100 # debug: no max gap if disable_sync in effect max_gap = None if Conf.get('disable_sync') else 100 steemd = SteemClient.instance() hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap): start_time = perf() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, spread=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (perf() - start_time) * 1000 log.info( "[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accts,% 3d follows --% 5dms%s", num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '') # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: cls._update_chain_state(steemd)
def _check_migrations(cls): """Check current migration version and perform updates as needed.""" #pylint: disable=line-too-long cls._ver = cls.db().query_one( "SELECT db_version FROM hive_state LIMIT 1") assert cls._ver is not None, 'could not load state record' if cls._ver == 0: raise Exception("dbv cannot be 0; reindex required") if cls._ver == 1: cls._set_ver(2) if cls._ver == 2: cls._set_ver(3) if cls._ver == 3: cls.db().query( "CREATE INDEX hive_accounts_ix3 ON hive_accounts (vote_weight, name varchar_pattern_ops)" ) cls._set_ver(4) if cls._ver == 4: cls.db().query( "CREATE INDEX hive_follows_ix4 ON hive_follows (follower, following) WHERE state = 2" ) cls._set_ver(5) if cls._ver == 5: # recover acct names lost to issue #151 from hive.steem.client import SteemClient from hive.indexer.accounts import Accounts names = SteemClient().get_all_account_names() Accounts.load_ids() Accounts.register(names, '1970-01-01T00:00:00') Accounts.clear_ids() cls._set_ver(6) if cls._ver == 6: cls.db().query("DROP INDEX hive_posts_cache_ix6") cls.db().query( "CREATE INDEX hive_posts_cache_ix6a ON hive_posts_cache (sc_trend, post_id) WHERE is_paidout = '0'" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix6b ON hive_posts_cache (post_id, sc_trend) WHERE is_paidout = '0'" ) cls.db().query("DROP INDEX hive_posts_cache_ix7") cls.db().query( "CREATE INDEX hive_posts_cache_ix7a ON hive_posts_cache (sc_hot, post_id) WHERE is_paidout = '0'" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix7b ON hive_posts_cache (post_id, sc_hot) WHERE is_paidout = '0'" ) cls._set_ver(7) if cls._ver == 7: cls.db().query( "CREATE INDEX hive_accounts_ix4 ON hive_accounts (id, name)") cls.db().query( "CREATE INDEX hive_accounts_ix5 ON hive_accounts (cached_at, name)" ) cls._set_ver(8) if cls._ver == 8: cls.db().query("DROP INDEX hive_follows_ix2") cls.db().query("DROP INDEX hive_follows_ix3") cls.db().query("DROP INDEX hive_follows_ix4") cls.db().query( "CREATE INDEX hive_follows_5a ON hive_follows (following, state, created_at, follower)" ) cls.db().query( "CREATE INDEX hive_follows_5b ON hive_follows (follower, state, created_at, following)" ) cls._set_ver(9) if cls._ver == 9: from hive.indexer.follow import Follow Follow.force_recount() cls._set_ver(10) if cls._ver == 10: cls.db().query( "CREATE INDEX hive_posts_cache_ix8 ON hive_posts_cache (category, payout, depth) WHERE is_paidout = '0'" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix9a ON hive_posts_cache (depth, payout, post_id) WHERE is_paidout = '0'" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix9b ON hive_posts_cache (category, depth, payout, post_id) WHERE is_paidout = '0'" ) cls._set_ver(11) if cls._ver == 11: cls.db().query("DROP INDEX hive_posts_ix1") cls.db().query("DROP INDEX hive_posts_ix2") cls.db().query( "CREATE INDEX hive_posts_ix3 ON hive_posts (author, depth, id) WHERE is_deleted = '0'" ) cls.db().query( "CREATE INDEX hive_posts_ix4 ON hive_posts (parent_id, id) WHERE is_deleted = '0'" ) cls._set_ver(12) reset_autovac(cls.db()) log.info("[HIVE] db version: %d", cls._ver) assert cls._ver == DB_VERSION, "migration missing or invalid DB_VERSION"
def _check_migrations(cls): """Check current migration version and perform updates as needed.""" #pylint: disable=line-too-long,too-many-branches,too-many-statements cls._ver = cls.db().query_one( "SELECT db_version FROM hive_state LIMIT 1") assert cls._ver is not None, 'could not load state record' if cls._ver == 0: raise Exception("dbv cannot be 0; reindex required") if cls._ver == 1: cls._set_ver(2) if cls._ver == 2: cls._set_ver(3) if cls._ver == 3: cls.db().query( "CREATE INDEX hive_accounts_ix3 ON hive_accounts (vote_weight, name varchar_pattern_ops)" ) cls._set_ver(4) if cls._ver == 4: cls.db().query( "CREATE INDEX hive_follows_ix4 ON hive_follows (follower, following) WHERE state = 2" ) cls._set_ver(5) if cls._ver == 5: # recover acct names lost to issue #151 from hive.steem.client import SteemClient from hive.indexer.accounts import Accounts names = SteemClient().get_all_account_names() Accounts.load_ids() Accounts.register(names, '1970-01-01T00:00:00') Accounts.clear_ids() cls._set_ver(6) if cls._ver == 6: cls.db().query("DROP INDEX hive_posts_cache_ix6") cls.db().query( "CREATE INDEX hive_posts_cache_ix6a ON hive_posts_cache (sc_trend, post_id) WHERE is_paidout = '0'" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix6b ON hive_posts_cache (post_id, sc_trend) WHERE is_paidout = '0'" ) cls.db().query("DROP INDEX hive_posts_cache_ix7") cls.db().query( "CREATE INDEX hive_posts_cache_ix7a ON hive_posts_cache (sc_hot, post_id) WHERE is_paidout = '0'" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix7b ON hive_posts_cache (post_id, sc_hot) WHERE is_paidout = '0'" ) cls._set_ver(7) if cls._ver == 7: cls.db().query( "CREATE INDEX hive_accounts_ix4 ON hive_accounts (id, name)") cls.db().query( "CREATE INDEX hive_accounts_ix5 ON hive_accounts (cached_at, name)" ) cls._set_ver(8) if cls._ver == 8: cls.db().query("DROP INDEX hive_follows_ix2") cls.db().query("DROP INDEX hive_follows_ix3") cls.db().query("DROP INDEX hive_follows_ix4") cls.db().query( "CREATE INDEX hive_follows_5a ON hive_follows (following, state, created_at, follower)" ) cls.db().query( "CREATE INDEX hive_follows_5b ON hive_follows (follower, state, created_at, following)" ) cls._set_ver(9) if cls._ver == 9: from hive.indexer.follow import Follow Follow.force_recount() cls._set_ver(10) if cls._ver == 10: cls.db().query( "CREATE INDEX hive_posts_cache_ix8 ON hive_posts_cache (category, payout, depth) WHERE is_paidout = '0'" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix9a ON hive_posts_cache (depth, payout, post_id) WHERE is_paidout = '0'" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix9b ON hive_posts_cache (category, depth, payout, post_id) WHERE is_paidout = '0'" ) cls._set_ver(11) if cls._ver == 11: cls.db().query("DROP INDEX hive_posts_ix1") cls.db().query("DROP INDEX hive_posts_ix2") cls.db().query( "CREATE INDEX hive_posts_ix3 ON hive_posts (author, depth, id) WHERE is_deleted = '0'" ) cls.db().query( "CREATE INDEX hive_posts_ix4 ON hive_posts (parent_id, id) WHERE is_deleted = '0'" ) cls._set_ver(12) if cls._ver == 12: # community schema assert False, 'not finalized' for table in [ 'hive_members', 'hive_flags', 'hive_modlog', 'hive_communities', 'hive_subscriptions', 'hive_roles', 'hive_notifs' ]: cls.db().query("DROP TABLE IF EXISTS %s" % table) build_metadata_community().create_all(cls.db().engine()) cls.db().query( "ALTER TABLE hive_accounts ADD COLUMN lr_notif_id integer") cls.db().query( "ALTER TABLE hive_posts DROP CONSTRAINT hive_posts_fk2") cls.db().query("ALTER TABLE hive_posts DROP COLUMN community") cls.db().query( "ALTER TABLE hive_posts ADD COLUMN community_id integer") cls.db().query( "ALTER TABLE hive_posts_cache ADD COLUMN community_id integer") cls._set_ver(13) if cls._ver == 13: sqls = ( "CREATE INDEX hive_posts_ix5 ON hive_posts (id) WHERE is_pinned = '1' AND is_deleted = '0'", "CREATE INDEX hive_posts_ix6 ON hive_posts (community_id, id) WHERE community_id IS NOT NULL AND is_pinned = '1' AND is_deleted = '0'", "CREATE INDEX hive_posts_cache_ix10 ON hive_posts_cache (post_id, payout) WHERE is_grayed = '1' AND payout > 0", "CREATE INDEX hive_posts_cache_ix30 ON hive_posts_cache (community_id, sc_trend, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", "CREATE INDEX hive_posts_cache_ix31 ON hive_posts_cache (community_id, sc_hot, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", "CREATE INDEX hive_posts_cache_ix32 ON hive_posts_cache (community_id, created_at, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", "CREATE INDEX hive_posts_cache_ix33 ON hive_posts_cache (community_id, payout, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND is_paidout = '0'", "CREATE INDEX hive_posts_cache_ix34 ON hive_posts_cache (community_id, payout, post_id) WHERE community_id IS NOT NULL AND is_grayed = '1' AND is_paidout = '0'" ) for sql in sqls: cls.db().query(sql) cls._set_ver(14) if cls._ver == 14: cls.db().query( "ALTER TABLE hive_communities ADD COLUMN primary_tag VARCHAR(32) NOT NULL DEFAULT ''" ) cls.db().query( "ALTER TABLE hive_communities ADD COLUMN category VARCHAR(32) NOT NULL DEFAULT ''" ) cls.db().query( "ALTER TABLE hive_communities ADD COLUMN avatar_url VARCHAR(1024) NOT NULL DEFAULT ''" ) cls.db().query( "ALTER TABLE hive_communities ADD COLUMN num_authors INTEGER NOT NULL DEFAULT 0" ) cls.db().query( "CREATE INDEX hive_posts_cache_ix20 ON hive_posts_cache (community_id, author, payout, post_id) WHERE is_paidout = '0'" ) cls._set_ver(15) if cls._ver == 15: cls.db().query("ALTER TABLE hive_accounts DROP COLUMN lr_notif_id") cls.db().query( "ALTER TABLE hive_accounts ADD COLUMN lastread_at TIMESTAMP WITHOUT TIME ZONE DEFAULT '1970-01-01 00:00:00' NOT NULL" ) cls.db().query( "CREATE INDEX hive_notifs_ix6 ON hive_notifs (dst_id, created_at, score, id) WHERE dst_id IS NOT NULL" ) cls._set_ver(16) reset_autovac(cls.db()) log.info("[HIVE] db version: %d", cls._ver) assert cls._ver == DB_VERSION, "migration missing or invalid DB_VERSION"
def listen_steemd(trail_blocks=0, max_gap=50): assert trail_blocks >= 0 assert trail_blocks < 25 # db state db_last = Blocks.last() last_block = db_last['num'] last_hash = db_last['hash'] # chain state steemd = get_adapter() head_block = steemd.head_block() next_expected = time.time() # loop state tries = 0 queue = [] # TODO: detect missed blocks by looking at block timestamps. # this would be an even more efficient way to track slots. while True: assert not last_block > head_block # fast fwd head block if slots missed curr_time = time.time() while curr_time >= next_expected: head_block += 1 next_expected += 3 # if gap too large, abort. if caught up, wait. gap = head_block - last_block if gap > max_gap: print("[LIVE] gap too large: %d -- abort listen mode" % gap) return elif gap > 0: print("[LIVE] %d blocks behind..." % gap) elif gap == 0: time.sleep(next_expected - curr_time) head_block += 1 next_expected += 3 # get the target block; if DNE, pause and retry block_num = last_block + 1 block = steemd.get_block(block_num) if not block: tries += 1 print("[LIVE] block %d unavailable (try %d). delay 1s. head: %d/%d." % (block_num, tries, head_block, steemd.head_block())) #assert tries < 12, "could not fetch block %s" % block_num assert tries < 240, "could not fetch block %s" % block_num #74 time.sleep(1) # pause for 1s; and, next_expected += 1 # delay schedule 1s continue last_block = block_num tries = 0 # ensure this block links to our last; otherwise, blow up. see #59 if last_hash != block['previous']: if queue: print("[FORK] Fork encountered. Emptying queue to retry!") return raise Exception("Unlinkable block: have %s, got %s -> %s)" % (last_hash, block['previous'], block['block_id'])) last_hash = block['block_id'] # buffer until queue full queue.append(block) if len(queue) <= trail_blocks: continue # buffer primed; process head of queue # ------------------------------------ block = queue.pop(0) start_time = time.perf_counter() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, period=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (time.perf_counter() - start_time) * 1000 print("[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accounts,% 3d follows --% 5dms%s" % (num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '')) # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: update_chain_state()