def run(self): import sys max_block_limit = sys.maxsize do_stale_block_check = True if self._conf.get('test_max_block'): max_block_limit = self._conf.get('test_max_block') do_stale_block_check = False # Correct max_block_limit by trail_blocks max_block_limit = max_block_limit - trail_blocks log.info( "max_block_limit corrected by specified trail_blocks number: %d is: %d", trail_blocks, max_block_limit) if self._conf.get('test_disable_sync'): # debug mode: no sync, just stream result = self.listen(trail_blocks, max_block_limit, do_stale_block_check) restore_handlers() return result while True: # sync up to irreversible block self.from_steemd() if not can_continue_thread(): break head = Blocks.head_num() if head >= max_block_limit: self.refresh_sparse_stats() log.info( "Exiting [LIVE SYNC] because irreversible block sync reached specified block limit: %d", max_block_limit) break try: # listen for new blocks self.listen(trail_blocks, max_block_limit, do_stale_block_check) except MicroForkException as e: # attempt to recover by restarting stream log.error("microfork: %s", repr(e)) head = Blocks.head_num() if head >= max_block_limit: self.refresh_sparse_stats() log.info( "Exiting [LIVE SYNC] because of specified block limit: %d", max_block_limit) break if not can_continue_thread(): break restore_handlers()
def from_checkpoints(self, chunk_size=1000): """Initial sync strategy: read from blocks on disk. This methods scans for files matching ./checkpoints/*.json.lst and uses them for hive's initial sync. Each line must contain exactly one block in JSON format. """ # pylint: disable=no-self-use last_block = Blocks.head_num() tuplize = lambda path: [int(path.split('/')[-1].split('.')[0]), path] basedir = os.path.dirname(os.path.realpath(__file__ + "/../..")) files = glob.glob(basedir + "/checkpoints/*.json.lst") tuples = sorted(map(tuplize, files), key=lambda f: f[0]) last_read = 0 for (num, path) in tuples: if last_block < num: log.info("[SYNC] Load %s. Last block: %d", path, last_block) with open(path) as f: # each line in file represents one block # we can skip the blocks we already have skip_lines = last_block - last_read remaining = drop(skip_lines, f) for lines in partition_all(chunk_size, remaining): Blocks.process_multi(map(json.loads, lines), True) last_block = num last_read = num
def from_steemd(self, is_initial_sync=False, chunk_size=1000): """Fast sync strategy: read/process blocks in batches.""" steemd = self._steem lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() if self._conf.get('test_max_block' ) and self._conf.get('test_max_block') < ubound: ubound = self._conf.get('test_max_block') count = ubound - lbound if count < 1: return massive_blocks_data_provider = None databases = None if self._conf.get('hived_database_url'): databases = MassiveBlocksDataProviderHiveDb.Databases(self._conf) massive_blocks_data_provider = MassiveBlocksDataProviderHiveDb( databases, self._conf.get('max_batch'), lbound, ubound, can_continue_thread, set_exception_thrown) else: massive_blocks_data_provider = MassiveBlocksDataProviderHiveRpc( self._conf, self._steem, self._conf.get('max_workers'), self._conf.get('max_workers'), self._conf.get('max_batch'), lbound, ubound, can_continue_thread, set_exception_thrown) _process_blocks_from_provider(self, massive_blocks_data_provider, is_initial_sync, lbound, ubound) if databases: databases.close()
def listen(cls): trail_blocks = Conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks < 25 steemd = SteemClient.instance() hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap=40): start_time = time.perf_counter() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, period=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (time.perf_counter() - start_time) * 1000 print("[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accounts,% 3d follows --% 5dms%s" % (num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '')) # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: cls._update_chain_state(steemd)
def show_info(_db): database_head_block = Blocks.head_num() sql = "SELECT level, patch_date, patched_to_revision FROM hive_db_patch_level ORDER BY level DESC LIMIT 1" patch_level_data = _db.query_row(sql) from hive.utils.misc import show_app_version show_app_version(log, database_head_block, patch_level_data)
def _update_chain_state(self): """Update basic state props (head block, feed price) in db.""" state = self._steem.gdgp_extended() self._db.query("""UPDATE hive_state SET block_num = :block_num, steem_per_mvest = :spm, usd_per_steem = :ups, sbd_per_steem = :sps, dgpo = :dgpo""", block_num=Blocks.head_num(), spm=state['steem_per_mvest'], ups=state['usd_per_steem'], sps=state['sbd_per_steem'], dgpo=json.dumps(state['dgpo'])) return state['dgpo']['head_block_number']
def sync_from_steemd(): is_initial_sync = DbState.is_initial_sync() steemd = get_adapter() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() if ubound <= lbound: return _abort = False try: print("[SYNC] start block %d, +%d to sync" % (lbound, ubound-lbound+1)) timer = Timer(ubound - lbound, entity='block', laps=['rps', 'wps']) while lbound < ubound: to = min(lbound + 1000, ubound) timer.batch_start() blocks = steemd.get_blocks_range(lbound, to) timer.batch_lap() Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) date = blocks[-1]['timestamp'] print(timer.batch_status("[SYNC] Got block %d @ %s" % (to-1, date))) lbound = to except KeyboardInterrupt: traceback.print_exc() print("\n\n[SYNC] Aborted.. cleaning up..") _abort = True if not is_initial_sync: # Follows flushing may need to be moved closer to core (i.e. moved # into main block transactions). Important to keep in sync since # we need to prevent expensive recounts. This will fail if we aborted # in the middle of a transaction, meaning data loss. Better than # forcing it, however, since in-memory cache will be out of sync # with db state. Follow.flush(trx=True) # This flush is low importance; accounts are swept regularly. if not _abort: Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True) if _abort: print("[SYNC] Aborted") exit()
def from_steemd(cls, is_initial_sync=False, chunk_size=1000): """Fast sync strategy: read/process blocks in batches.""" steemd = SteemClient.instance() lbound = Blocks.head_num() + 1 ubound = steemd.last_irreversible() count = ubound - lbound if count < 1: return _abort = False try: print("[SYNC] start block %d, +%d to sync" % (lbound, count)) timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: timer.batch_start() # fetch blocks to = min(lbound + chunk_size, ubound) blocks = steemd.get_blocks_range(lbound, to) lbound = to timer.batch_lap() # process blocks Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) date = blocks[-1]['timestamp'] print( timer.batch_status("[SYNC] Got block %d @ %s" % (to - 1, date))) except KeyboardInterrupt: traceback.print_exc() print("\n\n[SYNC] Aborted.. cleaning up..") _abort = True if not is_initial_sync: # This flush is low importance; accounts are swept regularly. if not _abort: Accounts.flush(trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(trx=True) if _abort: print("[SYNC] Aborted") exit()
def sync_from_checkpoints(): last_block = Blocks.head_num() _fn = lambda f: [int(f.split('/')[-1].split('.')[0]), f] mydir = os.path.dirname(os.path.realpath(__file__ + "/../..")) files = map(_fn, glob.glob(mydir + "/checkpoints/*.json.lst")) files = sorted(files, key=lambda f: f[0]) last_read = 0 for (num, path) in files: if last_block < num: print("[SYNC] Load {} -- last block: {}".format(path, last_block)) skip_lines = last_block - last_read sync_from_file(path, skip_lines, 250) last_block = num last_read = num
def listen(self): """Live (block following) mode.""" trail_blocks = self._conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks <= 100 # debug: no max gap if disable_sync in effect max_gap = None if self._conf.get('test_disable_sync') else 100 steemd = self._steem hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap): start_time = perf() self._db.query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(steemd, trx=False, spread=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(steemd, trx=False) self._db.query("COMMIT") ms = (perf() - start_time) * 1000 log.info( "[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d counts,% 3d accts,% 3d follows" " --% 5dms%s", num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], cnt['recount'], accts, follows, ms, ' SLOW' if ms > 1000 else '') if num % 1200 == 0: #1hr log.warning("head block %d @ %s", num, block['timestamp']) log.info("[LIVE] hourly stats") Accounts.fetch_ranks() #Community.recalc_pending_payouts() if num % 200 == 0: #10min Community.recalc_pending_payouts() if num % 100 == 0: #5min log.info("[LIVE] 5-min stats") Accounts.dirty_oldest(500) if num % 20 == 0: #1min self._update_chain_state()
def listen(cls): """Live (block following) mode.""" trail_blocks = Conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks <= 100 # debug: no max gap if disable_sync in effect max_gap = None if Conf.get('disable_sync') else 100 steemd = SteemClient.instance() hive_head = Blocks.head_num() for block in steemd.stream_blocks(hive_head + 1, trail_blocks, max_gap): start_time = perf() query("START TRANSACTION") num = Blocks.process(block) follows = Follow.flush(trx=False) accts = Accounts.flush(trx=False, spread=8) CachedPost.dirty_paidouts(block['timestamp']) cnt = CachedPost.flush(trx=False) query("COMMIT") ms = (perf() - start_time) * 1000 log.info( "[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," "% 3d payouts,% 3d votes,% 3d accts,% 3d follows --% 5dms%s", num, block['timestamp'], len(block['transactions']), cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], accts, follows, int(ms), ' SLOW' if ms > 1000 else '') # once per hour, update accounts if num % 1200 == 0: Accounts.dirty_oldest(10000) Accounts.flush(trx=True) #Accounts.update_ranks() # once a minute, update chain props if num % 20 == 0: cls._update_chain_state(steemd)
def from_checkpoints(cls, chunk_size=1000): last_block = Blocks.head_num() tuplize = lambda path: [int(path.split('/')[-1].split('.')[0]), path] basedir = os.path.dirname(os.path.realpath(__file__ + "/../..")) files = glob.glob(basedir + "/checkpoints/*.json.lst") tuples = sorted(map(tuplize, files), key=lambda f: f[0]) last_read = 0 for (num, path) in tuples: if last_block < num: print("[SYNC] Load %s -- last block: %d" % (path, last_block)) with open(path) as f: # each line in file represents one block # we can skip the blocks we already have skip_lines = last_block - last_read remaining = drop(skip_lines, f) for lines in partition_all(chunk_size, remaining): Blocks.process_multi(map(json.loads, lines), True) last_block = num last_read = num
def from_dpayd(self, is_initial_sync=False, chunk_size=1000): """Fast sync strategy: read/process blocks in batches.""" # pylint: disable=no-self-use dpayd = self._dpay lbound = Blocks.head_num() + 1 ubound = self._conf.get('test_max_block') or dpayd.last_irreversible() count = ubound - lbound if count < 1: return log.info("[SYNC] start block %d, +%d to sync", lbound, count) timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: timer.batch_start() # fetch blocks to = min(lbound + chunk_size, ubound) blocks = dpayd.get_blocks_range(lbound, to) lbound = to timer.batch_lap() # process blocks Blocks.process_multi(blocks, is_initial_sync) timer.batch_finish(len(blocks)) _prefix = ("[SYNC] Got block %d @ %s" % (to - 1, blocks[-1]['timestamp'])) log.info(timer.batch_status(_prefix)) if not is_initial_sync: # This flush is low importance; accounts are swept regularly. Accounts.flush(dpayd, trx=True) # If this flush fails, all that could potentially be lost here is # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. CachedPost.flush(dpayd, trx=True)
def listen(self, trail_blocks, max_sync_block, do_stale_block_check): """Live (block following) mode. trail_blocks - how many blocks need to be collected to start processed the oldest ( delay in blocks processing against blocks collecting ) max_sync_block - limit of blocks to sync, the function will return if it is reached do_stale_block_check - check if the last collected block is not older than 60s """ # debug: no max gap if disable_sync in effect max_gap = None if self._conf.get('test_disable_sync') else 100 steemd = self._steem hive_head = Blocks.head_num() log.info("[LIVE SYNC] Entering listen with HM head: %d", hive_head) if hive_head >= max_sync_block: self.refresh_sparse_stats() log.info( "[LIVE SYNC] Exiting due to block limit exceeded: synced block number: %d, max_sync_block: %d", hive_head, max_sync_block) return for block in self._stream_blocks(hive_head + 1, can_continue_thread, set_exception_thrown, trail_blocks, max_gap, do_stale_block_check): if not can_continue_thread(): break num = block.get_num() log.info( "[LIVE SYNC] =====> About to process block %d with timestamp %s", num, block.get_date()) start_time = perf() Blocks.process_multi([block], False) otm = OPSM.log_current( "Operations present in the processed blocks") ftm = FSM.log_current("Flushing times") ms = (perf() - start_time) * 1000 log.info( "[LIVE SYNC] <===== Processed block %d at %s --% 4d txs" " --% 5dms%s", num, block.get_date(), block.get_number_of_transactions(), ms, ' SLOW' if ms > 1000 else '') log.info("[LIVE SYNC] Current system time: %s", datetime.now().strftime("%H:%M:%S")) if num % 1200 == 0: #1hour log.warning("head block %d @ %s", num, block.get_date()) log.info("[LIVE SYNC] hourly stats") log.info("[LIVE SYNC] filling payout_stats_view executed") with ThreadPoolExecutor(max_workers=2) as executor: executor.submit(PayoutStats.generate) executor.submit(Mentions.refresh) if num % 200 == 0: #10min update_communities_posts_and_rank(self._db) if num % 20 == 0: #1min self._update_chain_state() PC.broadcast(BroadcastObject('sync_current_block', num, 'blocks')) FSM.next_blocks() OPSM.next_blocks() if num >= max_sync_block: log.info( "Stopping [LIVE SYNC] because of specified block limit: %d", max_sync_block) break
def run(self): old_sig_int_handler = getsignal(SIGINT) old_sig_term_handler = getsignal(SIGTERM) set_handlers() Community.start_block = self._conf.get("community_start_block") # ensure db schema up to date, check app status DbState.initialize() if self._conf.get("log_explain_queries"): is_superuser = self._db.query_one("SELECT is_superuser()") assert is_superuser, 'The parameter --log_explain_queries=true can be used only when connect to the database with SUPERUSER privileges' _is_consistency = Blocks.is_consistency() if not _is_consistency: raise RuntimeError( "Fatal error related to `hive_blocks` consistency") show_info(self._db) paths = self._conf.get("mock_block_data_path") or [] for path in paths: self.load_mock_data(path) mock_vops_data_path = self._conf.get("mock_vops_data_path") if mock_vops_data_path: MockVopsProvider.load_block_data(mock_vops_data_path) # MockVopsProvider.print_data() # prefetch id->name and id->rank memory maps Accounts.load_ids() # community stats update_communities_posts_and_rank(self._db) last_imported_block = Blocks.head_num() hived_head_block = self._conf.get( 'test_max_block') or self._steem.last_irreversible() log.info("target_head_block : %s", hived_head_block) if DbState.is_initial_sync(): DbState.before_initial_sync(last_imported_block, hived_head_block) # resume initial sync self.initial() if not can_continue_thread(): restore_handlers() return current_imported_block = Blocks.head_num() # beacuse we cannot break long sql operations, then we back default CTRL+C # behavior for the time of post initial actions restore_handlers() try: DbState.finish_initial_sync(current_imported_block) except KeyboardInterrupt: log.info("Break finish initial sync") set_exception_thrown() return set_handlers() else: # recover from fork Blocks.verify_head(self._steem) self._update_chain_state() global trail_blocks trail_blocks = self._conf.get('trail_blocks') assert trail_blocks >= 0 assert trail_blocks <= 100
def head_state(*args): _ = args # JSONRPC injects 4 arguments here steemd_head = get_adapter().head_block() hive_head = Blocks.head_num() diff = steemd_head - hive_head return dict(steemd=steemd_head, hive=hive_head, diff=diff)