示例#1
0
def reset_autovac():
    """Initializes per-table autovacuum/autoanalyze params"""
    # consider using scale_factor = 0 with flat thresholds:
    #   autovacuum_vacuum_threshold, autovacuum_analyze_threshold

    autovac_config = {
        # default
        'hive_accounts': (0.2, 0.1),
        'hive_state': (0.2, 0.1),
        'hive_reblogs': (0.2, 0.1),
        'hive_payments': (0.2, 0.1),
        # more aggresive
        'hive_posts': (0.010, 0.005),
        'hive_post_tags': (0.010, 0.005),
        'hive_feed_cache': (0.010, 0.005),
        # very aggresive
        'hive_posts_cache':
        (0.0050, 0.0025),  # @36M, ~2/day,  3/day (~240k new tuples daily)
        'hive_blocks': (0.0100, 0.0014),  # @20M, ~1/week, 1/day
        'hive_follows': (0.0050, 0.0025)
    }  # @47M, ~1/day,  3/day (~300k new tuples daily)

    for table, (vacuum_sf, analyze_sf) in autovac_config.items():
        sql = """ALTER TABLE %s SET (autovacuum_vacuum_scale_factor = %s,
                                     autovacuum_analyze_scale_factor = %s)"""
        Db.instance().query(sql % (table, vacuum_sf, analyze_sf))
示例#2
0
文件: cli.py 项目: tiotdev/hivemind
def run():
    """Run the proper routine as indicated by hive --mode argument."""

    conf = Conf.init_argparse()
    Db.set_shared_instance(conf.db())
    mode = conf.mode()

    if mode == 'server':
        from hive.server.serve import run_server
        run_server(conf=conf)

    elif mode == 'sync':
        from hive.indexer.sync import Sync
        Sync(conf=conf).run()

    elif mode == 'status':
        from hive.db.db_state import DbState
        print(DbState.status())

    #elif mode == 'sync-profile':
    #    from hive.indexer.sync import Sync
    #    from hive.utils.profiler import Profiler
    #    with Profiler():
    #        Sync(conf=conf).run()

    else:
        raise Exception("unknown run mode %s" % mode)
示例#3
0
    def db(self):
        """Get a configured instance of Db."""
        if self._db is None:
            url = self.get('database_url')
            enable_autoexplain = self.get( 'log_explain_queries' )
            assert url, ('--database-url (or DATABASE_URL env) not specified; '
                         'e.g. postgresql://user:pass@localhost:5432/hive')
            self._db = Db(url, "root db creation", enable_autoexplain )
            log.info("The database created...")

        return self._db
示例#4
0
文件: cli.py 项目: sshyran/hivemind
def run():
    """Run the service specified in the `--mode` argument."""

    conf = Conf.init_argparse()
    Db.set_shared_instance(conf.db())
    mode = conf.mode()

    if conf.get('test_profile'):
        from hive.utils.profiler import Profiler
        with Profiler():
            launch_mode(mode, conf)
    else:
        launch_mode(mode, conf)
示例#5
0
 def db(self):
     """Get a configured instance of Db."""
     if not self._db:
         url = self.get('database_url')
         assert url, ('--database-url (or DATABASE_URL env) not specified; '
                      'e.g. postgresql://user:pass@localhost:5432/hive')
         self._db = Db(url)
     return self._db
示例#6
0
def setup():
    # initialize schema
    engine = Db.create_engine(echo=False)
    build_metadata().create_all(engine)

    # tune auto vacuum/analyze
    reset_autovac()

    # default rows
    sqls = [
        "INSERT INTO hive_state (block_num, db_version, steem_per_mvest, usd_per_steem, sbd_per_steem, dgpo) VALUES (0, 3, 0, 0, 0, '')",
        "INSERT INTO hive_blocks (num, hash, created_at) VALUES (0, '0000000000000000000000000000000000000000', '2016-03-24 16:04:57')",
        "INSERT INTO hive_accounts (name, created_at) VALUES ('miners',    '2016-03-24 16:05:00')",
        "INSERT INTO hive_accounts (name, created_at) VALUES ('null',      '2016-03-24 16:05:00')",
        "INSERT INTO hive_accounts (name, created_at) VALUES ('temp',      '2016-03-24 16:05:00')",
        "INSERT INTO hive_accounts (name, created_at) VALUES ('initminer', '2016-03-24 16:05:00')"
    ]
    for sql in sqls:
        Db.instance().query(sql)
示例#7
0
文件: cli.py 项目: dsites/dsite-lab
def run():
    """Run the proper routine as indicated by hive --mode argument."""

    conf = Conf.init_argparse()
    Db.set_shared_instance(conf.db())
    mode = '/'.join(conf.get('mode'))

    if mode == 'server':
        from hive.server.serve import run_server
        run_server(conf=conf)

    elif mode == 'sync':
        from hive.indexer.sync import Sync
        Sync(conf=conf).run()

    elif mode == 'status':
        from hive.db.db_state import DbState
        print(DbState.status())

    else:
        raise Exception("unknown run mode %s" % mode)
    class Databases:
        def __init__(self, conf):
            self._db_root = Db(conf.get('hived_database_url'),
                               "MassiveBlocksProvider.Root",
                               conf.get('log_explain_queries'))
            self._db_operations = Db(conf.get('hived_database_url'),
                                     "MassiveBlocksProvider.OperationsData",
                                     conf.get('log_explain_queries'))
            self._db_blocks_data = Db(conf.get('hived_database_url'),
                                      "MassiveBlocksProvider.BlocksData",
                                      conf.get('log_explain_queries'))

            assert self._db_root
            assert self._db_operations
            assert self._db_blocks_data

        def close(self):
            self._db_root.close()
            self._db_operations.close()
            self._db_blocks_data.close()

        def get_root(self):
            return self._db_root

        def get_operations(self):
            return self._db_operations

        def get_blocks_data(self):
            return self._db_blocks_data
        def __init__(self, conf):
            self._db_root = Db(conf.get('hived_database_url'),
                               "MassiveBlocksProvider.Root",
                               conf.get('log_explain_queries'))
            self._db_operations = Db(conf.get('hived_database_url'),
                                     "MassiveBlocksProvider.OperationsData",
                                     conf.get('log_explain_queries'))
            self._db_blocks_data = Db(conf.get('hived_database_url'),
                                      "MassiveBlocksProvider.BlocksData",
                                      conf.get('log_explain_queries'))

            assert self._db_root
            assert self._db_operations
            assert self._db_blocks_data
示例#10
0
import logging
import collections

from hive.db.adapter import Db
from hive.db.db_state import DbState

from hive.utils.normalize import load_json_key
from hive.indexer.accounts import Accounts
from hive.indexer.cached_post import CachedPost
from hive.indexer.feed_cache import FeedCache

from hive.community.roles import is_community_post_valid

log = logging.getLogger(__name__)
DB = Db.instance()

class Posts:
    """Handles critical/core post ops and data."""

    # LRU cache for (author-permlink -> id) lookup (~400mb per 1M entries)
    CACHE_SIZE = 2000000
    _ids = collections.OrderedDict()
    _hits = 0
    _miss = 0

    @classmethod
    def last_id(cls):
        """Get the last indexed post id."""
        sql = "SELECT MAX(id) FROM hive_posts WHERE is_deleted = '0'"
        return DB.query_one(sql) or 0
示例#11
0
"""Hive server and API tests."""
from hive.conf import Conf
from hive.db.adapter import Db

Db.set_shared_instance(Conf.init_test().db())
示例#12
0
 def db(cls):
     """Get a db adapter instance."""
     if not cls._db:
         cls._db = Db.instance()
     return cls._db
示例#13
0
def teardown():
    """Drop all tables"""
    engine = Db.create_engine(echo=True)
    metadata = build_metadata()
    metadata.drop_all(engine)
示例#14
0
def teardown():
    engine = Db.create_engine(echo=True)
    metadata = build_metadata()
    metadata.drop_all(engine)
示例#15
0
class Conf():
    """ Manages sync/server configuration via args, ENVs, and hive.conf. """

    def __init__(self):
        self._args = None
        self._env = None
        self._db = None
        self._steem = None
        self.arguments = None

    def init_argparse(self, strict=True, **kwargs):
        """Read hive config (CLI arg > ENV var > config)"""

        #pylint: disable=line-too-long
        parser = configargparse.get_arg_parser(
            default_config_files=['./hive.conf'],
            **kwargs)
        add = parser.add

        # runmodes: sync, server, status
        add('mode', nargs='*', default=['sync'])

        # common
        add('--database-url', env_var='DATABASE_URL', required=False, help='database connection url', default='')
        add('--steemd-url', env_var='STEEMD_URL', required=False, help='steemd/jussi endpoint', default='{"default" : "https://api.hive.blog"}')
        add('--muted-accounts-url', env_var='MUTED_ACCOUNTS_URL', required=False, help='url to flat list of muted accounts', default='https://raw.githubusercontent.com/hivevectordefense/irredeemables/master/full.txt')
        add('--blacklist-api-url', env_var='BLACKLIST_API_URL', required=False, help='url to access blacklist api', default='https://blacklist.usehive.com')

        # server
        add('--http-server-port', type=int, env_var='HTTP_SERVER_PORT', default=8080)
        add('--prometheus-port', type=int, env_var='PROMETHEUS_PORT', required=False, help='if specified, runs prometheus deamon on specified port, which provide statistic and performance data')

        # sync
        add('--max-workers', type=int, env_var='MAX_WORKERS', help='max workers for batch requests', default=6)
        add('--max-batch', type=int, env_var='MAX_BATCH', help='max chunk size for batch requests', default=35)
        add('--max-retries', type=int, env_var='MAX_RETRIES', help='max number of retries after request failure is accepted; default -1 means no limit', default=-1)
        add('--trail-blocks', type=int, env_var='TRAIL_BLOCKS', help='number of blocks to trail head by', default=2)
        add('--sync-to-s3', type=strtobool, env_var='SYNC_TO_S3', help='alternative healthcheck for background sync service', default=False)
        add('--hived-database-url', env_var='HIVED_DATABASE_URL', required=False, help='Hived blocks database connection url', default='')

        # test/debug
        add('--log-level', env_var='LOG_LEVEL', default='INFO')
        add('--test-disable-sync', type=strtobool, env_var='TEST_DISABLE_SYNC', help='(debug) skip sync and sweep; jump to block streaming', default=False)
        add('--test-max-block', type=int, env_var='TEST_MAX_BLOCK', help='(debug) only sync to given block, for running sync test', default=None)
        add('--test-skip-ais-phase', env_var='TEST_SKIP_AIS_PHASE', help='(debug) Allows to skip After-Initial-Sync phase. Useful to go into live sync or exit if TEST_MAX_BLOCK is used', action='store_true')
        add('--test-profile', type=strtobool, env_var='TEST_PROFILE', help='(debug) profile execution', default=False)
        add('--log-request-times', env_var='LOG_REQUEST_TIMES', help='(debug) allows to generate log containing request processing times', action='store_true')
        add('--log-virtual-op-calls', env_var='LOG_VIRTUAL_OP_CALLS', help='(debug) log virtual op calls and responses', default=False)
        add('--mock-block-data-path', type=str, nargs='+', env_var='MOCK_BLOCK_DATA_PATH', help='(debug/testing) load additional data from block data file')
        add('--mock-vops-data-path', type=str, env_var='MOCK_VOPS_DATA_PATH', help='(debug/testing) load additional data from virtual operations data file')
        add('--community-start-block', type=int, env_var='COMMUNITY_START_BLOCK', default=37500000)
        add('--log_explain_queries', type=strtobool, env_var='LOG_EXPLAIN_QUERIES', help='(debug) Adds to log output of EXPLAIN ANALYZE for specific queries - only for db super user', default=False)

        # logging
        add('--log-timestamp', help='Output timestamp in log', action='store_true')
        add('--log-epoch', help='Output unix epoch in log', action='store_true')
        add('--log-mask-sensitive-data', help='Mask sensitive data, e.g. passwords', action='store_true')

        add('--pid-file', type=str, env_var='PID_FILE', help='Allows to dump current process pid into specified file', default=None)

        add('--auto-http-server-port', nargs='+', type=int, help='Hivemind will listen on first available port from this range')

        # needed for e.g. tests - other args may be present
        args = (parser.parse_args() if strict
                else parser.parse_known_args()[0])

        self._args = vars(args)
        self.arguments = parser._actions

        # configure logger and print config
        root = logging.getLogger()
        root.setLevel(self.log_level())

        try:
            if 'auto_http_server_port' in vars(args) and vars(args)['auto_http_server_port'] is not None:
                port_range = vars(args)['auto_http_server_port']
                port_range_len = len(port_range)
                if port_range_len == 0 or port_range_len > 2:
                    raise ValueError("auto-http-server-port expect maximum two values, minimum one")
                if port_range_len == 2 and port_range[0] > port_range[1]:
                    raise ValueError("port min value is greater than port max value")
        except Exception as ex:
            root.error("Value error: {}".format(ex))
            exit(1)

        # Print command line args, but on continuous integration server
        # hide db connection string.
        from sys import argv
        if self.get('log_mask_sensitive_data'):
            my_args = []
            upcoming_connection_string = False
            for elem in argv[1:]:
                if upcoming_connection_string:
                    upcoming_connection_string = False
                    my_args.append('MASKED')
                    continue
                if elem == '--database-url':
                    upcoming_connection_string = True
                my_args.append(elem)
            root.info("Used command line args: %s", " ".join(my_args))
        else:
            root.info("Used command line args: %s", " ".join(argv[1:]))

        # uncomment for full list of program args
        #args_list = ["--" + k + " " + str(v) for k,v in vars(args).items()]
        #root.info("Full command line args: %s", " ".join(args_list))

        if self.mode() == 'server':
            #DbStats.SLOW_QUERY_MS = 750
            DbStats.SLOW_QUERY_MS = 200 # TODO

    def __enter__(self):
        return self

    def __exit__(self, exc_type, value, traceback):
        self.disconnect()

    def args(self):
        """Get the raw Namespace object as generated by configargparse"""
        return self._args

    def steem(self):
        """Get a SteemClient instance, lazily initialized"""
        if not self._steem:
            from json import loads
            self._steem = SteemClient(
                url=loads(self.get('steemd_url')),
                max_batch=self.get('max_batch'),
                max_workers=self.get('max_workers'),
                max_retries=self.get('max_retries'))
        return self._steem

    def db(self):
        """Get a configured instance of Db."""
        if self._db is None:
            url = self.get('database_url')
            enable_autoexplain = self.get( 'log_explain_queries' )
            assert url, ('--database-url (or DATABASE_URL env) not specified; '
                         'e.g. postgresql://user:pass@localhost:5432/hive')
            self._db = Db(url, "root db creation", enable_autoexplain )
            log.info("The database created...")

        return self._db

    def get(self, param):
        """Reads a single property, e.g. `database_url`."""
        assert self._args, "run init_argparse()"
        return self._args[param]

    def mode(self):
        """Get the CLI runmode.

        - `server`: API server
        - `sync`: db sync process
        - `status`: status info dump
        """
        return '/'.join(self.get('mode'))

    def log_level(self):
        """Get `logger`s internal int level from config string."""
        return int_log_level(self.get('log_level'))

    def pid_file(self):
        """Get optional pid_file name to put current process pid in"""
        return self._args.get("pid_file", None)

    def generate_completion(self):
        arguments = []
        for arg in self.arguments:
            arguments.extend(arg.option_strings)
        arguments = " ".join(arguments)
        with open('hive-completion.bash', 'w') as file:
            file.writelines([
                "#!/bin/bash\n",
                "# to run type: source hive-completion.bash\n\n",
                "# if you want to have completion everywhere, execute theese commands\n",
                "# ln $PWD/hive-completion.bash $HOME/.local/\n",
                '# echo "source $HOME/.local/hive-completion.bash" >> $HOME/.bashrc\n',
                "# source $HOME/.bashrc\n\n"
                f'complete -f -W "{arguments}" hive\n',
                "\n"
            ])

    def disconnect(self):
        if self._db is not None:
            self._db.close()
            self._db.close_engine()
            self._db = None
            log.info("The database is disconnected...")