def __init__(self, conf, logger):
        super(TimescaleListenStore, self).__init__(logger)

        timescale.init_db_connection(conf['SQLALCHEMY_TIMESCALE_URI'])

        # Initialize brainzutils cache
        self.ns = conf['REDIS_NAMESPACE']
        init_cache(host=conf['REDIS_HOST'], port=conf['REDIS_PORT'], namespace=conf['REDIS_NAMESPACE'])
        self.dump_temp_dir_root = conf.get('LISTEN_DUMP_TEMP_DIR_ROOT', tempfile.mkdtemp())
def refresh_listen_count_aggregate():
    """
        Manually refresh the listen_count continuous aggregate.

        Arg:

          year_offset: How many years into the past should we start refreshing (e.g 1 year, 
                       will refresh everything that is 1 year or older.
          year_count: How many years from year_offset should we update.

        Example:

           Assuming today is 2022-01-01 and this function is called for year_offset 1 and
           year_count 1 then all of 2021 will be refreshed.
    """

    timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)

    end_ts = int(datetime.now().timestamp()) - SECONDS_IN_A_YEAR
    start_ts = end_ts - \
        (NUM_YEARS_TO_PROCESS_FOR_CONTINUOUS_AGGREGATE_REFRESH * SECONDS_IN_A_YEAR) + 1

    while True:
        query = "call refresh_continuous_aggregate('listen_count_30day', :start_ts, :end_ts)"
        t0 = time.monotonic()
        try:
            with timescale.engine.connect() as connection:
                connection.connection.set_isolation_level(0)
                connection.execute(sqlalchemy.text(query), {
                    "start_ts": start_ts,
                    "end_ts": end_ts
                })
        except psycopg2.OperationalError as e:
            self.log.error("Cannot refresh listen_count_30day cont agg: %s" %
                           str(e), exc_info=True)
            raise

        t1 = time.monotonic()
        logger.info("Refreshed continuous aggregate for: %s to %s in %.2fs" % (str(
            datetime.fromtimestamp(start_ts)), str(datetime.fromtimestamp(end_ts)), t1-t0))

        end_ts -= (NUM_YEARS_TO_PROCESS_FOR_CONTINUOUS_AGGREGATE_REFRESH * SECONDS_IN_A_YEAR)
        start_ts -= (NUM_YEARS_TO_PROCESS_FOR_CONTINUOUS_AGGREGATE_REFRESH * SECONDS_IN_A_YEAR)
        if end_ts < DATA_START_YEAR_IN_SECONDS:
            break
    def reset_timescale_db(self):

        ts.init_db_connection(config.TIMESCALE_ADMIN_URI)
        ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'drop_db.sql'))
        ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_db.sql'))
        ts.engine.dispose()

        ts.init_db_connection(config.TIMESCALE_ADMIN_LB_URI)
        ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_extensions.sql'))
        ts.engine.dispose()

        ts.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR,
                                       'create_schemas.sql'))
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_types.sql'))
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_tables.sql'))
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_functions.sql'))
        ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_views.sql'))
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR,
                                       'create_indexes.sql'))
        ts.create_view_indexes()
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_primary_keys.sql'))
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_foreign_keys.sql'))
        ts.engine.dispose()
示例#4
0
def init_db(force, create_db):
    """Initializes database.
    This process involves several steps:
    1. Table structure is created.
    2. Indexes are created.
    3. Views are created
    """
    from listenbrainz import config
    ts.init_db_connection(config.TIMESCALE_ADMIN_URI)
    if force:
        res = ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'drop_db.sql'))
        if not res:
            raise Exception(
                'Failed to drop existing database and user! Exit code: %i' %
                res)

    if create_db or force:
        print('TS: Creating user and a database...')
        res = ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_db.sql'))
        if not res:
            raise Exception(
                'Failed to create new database and user! Exit code: %i' % res)

        ts.init_db_connection(config.TIMESCALE_ADMIN_LB_URI)
        print('TS: Creating database extensions...')
        res = ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_extensions.sql'))
    # Don't raise an exception if the extension already exists

    ts.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)
    application = webserver.create_app()
    with application.app_context():
        print('TS: Creating tables...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_tables.sql'))

        print('TS: Creating Functions...')
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_functions.sql'))

        print('TS: Creating views...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_views.sql'))

        print('TS: Creating indexes...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR,
                                       'create_indexes.sql'))

        print("Done!")
def gen_app(config_path=None, debug=None):
    """ Generate a Flask app for LB with all configurations done and connections established.

    In the Flask app returned, blueprints are not registered.
    """
    app = CustomFlask(
        import_name=__name__,
        use_flask_uuid=True,
    )

    load_config(app)
    if debug is not None:
        app.debug = debug

    # initialize Flask-DebugToolbar if the debug option is True
    if app.debug and app.config['SECRET_KEY']:
        app.init_debug_toolbar()

    # Logging
    app.init_loggers(file_config=app.config.get('LOG_FILE'),
                     email_config=app.config.get('LOG_EMAIL'),
                     sentry_config=app.config.get('LOG_SENTRY'))

    # Redis connection
    create_redis(app)

    # Timescale connection
    create_timescale(app)

    # RabbitMQ connection
    try:
        create_rabbitmq(app)
    except ConnectionError:
        app.logger.critical("RabbitMQ service is not up!", exc_info=True)

    # Database connections
    from listenbrainz import db
    from listenbrainz.db import timescale as ts
    db.init_db_connection(app.config['SQLALCHEMY_DATABASE_URI'])
    ts.init_db_connection(app.config['SQLALCHEMY_TIMESCALE_URI'])
    from listenbrainz.webserver.external import messybrainz
    messybrainz.init_db_connection(
        app.config['MESSYBRAINZ_SQLALCHEMY_DATABASE_URI'])

    if app.config['MB_DATABASE_URI']:
        from brainzutils import musicbrainz_db
        musicbrainz_db.init_db_engine(app.config['MB_DATABASE_URI'])

    # OAuth
    from listenbrainz.webserver.login import login_manager, provider
    login_manager.init_app(app)
    provider.init(app.config['MUSICBRAINZ_CLIENT_ID'],
                  app.config['MUSICBRAINZ_CLIENT_SECRET'])

    # Error handling
    from listenbrainz.webserver.errors import init_error_handlers
    init_error_handlers(app)

    from listenbrainz.webserver import rate_limiter

    @app.after_request
    def after_request_callbacks(response):
        return rate_limiter.inject_x_rate_headers(response)

    # Template utilities
    app.jinja_env.add_extension('jinja2.ext.do')
    from listenbrainz.webserver import utils
    app.jinja_env.filters['date'] = utils.reformat_date
    app.jinja_env.filters['datetime'] = utils.reformat_datetime

    return app
示例#6
0
def init_ts_db(force, create_db):
    """Initializes database.
    This process involves several steps:
    1. Table structure is created.
    2. Indexes are created.
    3. Views are created
    """
    from listenbrainz import config
    ts.init_db_connection(config.TIMESCALE_ADMIN_URI)
    if force:
        res = ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'drop_db.sql'))
        if not res:
            raise Exception(
                'Failed to drop existing database and user! Exit code: %i' %
                res)

    if create_db or force:
        print('TS: Creating user and a database...')
        retries = 0
        while True:
            try:
                res = ts.run_sql_script_without_transaction(
                    os.path.join(TIMESCALE_SQL_DIR, 'create_db.sql'))
                break
            except sqlalchemy.exc.OperationalError:
                print(
                    "Trapped template1 access error, FFS! Sleeping, trying again."
                )
                retries += 1
                if retries == 5:
                    raise
                sleep(1)
                continue

        if not res:
            raise Exception(
                'Failed to create new database and user! Exit code: %i' % res)

        ts.init_db_connection(config.TIMESCALE_ADMIN_LB_URI)
        print('TS: Creating database extensions...')
        res = ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_extensions.sql'))
    # Don't raise an exception if the extension already exists

    ts.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)
    application = webserver.create_app()
    with application.app_context():
        print('TS: Creating Schemas...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR,
                                       'create_schemas.sql'))

        print('TS: Creating Types...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_types.sql'))

        print('TS: Creating tables...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR, 'create_tables.sql'))

        print('TS: Creating Functions...')
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_functions.sql'))

        print('TS: Creating views...')
        ts.run_sql_script_without_transaction(
            os.path.join(TIMESCALE_SQL_DIR, 'create_views.sql'))

        print('TS: Creating indexes...')
        ts.run_sql_script(os.path.join(TIMESCALE_SQL_DIR,
                                       'create_indexes.sql'))
        ts.create_view_indexes()

        print('TS: Creating Primary and Foreign Keys...')
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_primary_keys.sql'))
        ts.run_sql_script(
            os.path.join(TIMESCALE_SQL_DIR, 'create_foreign_keys.sql'))

        print("Done!")
示例#7
0
def create_app(debug=None):
    """ Generate a Flask app for LB with all configurations done and connections established.

    In the Flask app returned, blueprints are not registered.
    """

    app = CustomFlask(
        import_name=__name__,
        use_flask_uuid=True,
    )

    load_config(app)
    if debug is not None:
        app.debug = debug
    # As early as possible, if debug is True, set the log level of our 'listenbrainz' logger to DEBUG
    # to prevent flask from creating a new log handler
    if app.debug:
        logger = logging.getLogger('listenbrainz')
        logger.setLevel(logging.DEBUG)

    # initialize Flask-DebugToolbar if the debug option is True
    if app.debug and app.config['SECRET_KEY']:
        app.init_debug_toolbar()

    sentry_config = app.config.get('LOG_SENTRY')
    if sentry_config:
        sentry.init_sentry(**sentry_config)

    # Initialize BU cache and metrics
    cache.init(host=app.config['REDIS_HOST'],
               port=app.config['REDIS_PORT'],
               namespace=app.config['REDIS_NAMESPACE'])
    metrics.init("listenbrainz")

    # Database connections
    from listenbrainz import db
    from listenbrainz.db import timescale as ts
    from listenbrainz import messybrainz as msb
    db.init_db_connection(app.config['SQLALCHEMY_DATABASE_URI'])
    ts.init_db_connection(app.config['SQLALCHEMY_TIMESCALE_URI'])
    msb.init_db_connection(app.config['MESSYBRAINZ_SQLALCHEMY_DATABASE_URI'])

    # Redis connection
    from listenbrainz.webserver.redis_connection import init_redis_connection
    init_redis_connection(app.logger)

    # Timescale connection
    from listenbrainz.webserver.timescale_connection import init_timescale_connection
    init_timescale_connection(app)

    # RabbitMQ connection
    from listenbrainz.webserver.rabbitmq_connection import init_rabbitmq_connection
    try:
        init_rabbitmq_connection(app)
    except ConnectionError:
        app.logger.critical("RabbitMQ service is not up!", exc_info=True)

    if app.config['MB_DATABASE_URI']:
        from brainzutils import musicbrainz_db
        musicbrainz_db.init_db_engine(app.config['MB_DATABASE_URI'])

    # OAuth
    from listenbrainz.webserver.login import login_manager, provider
    login_manager.init_app(app)
    provider.init(app.config['MUSICBRAINZ_CLIENT_ID'],
                  app.config['MUSICBRAINZ_CLIENT_SECRET'])

    # Error handling
    from listenbrainz.webserver.errors import init_error_handlers
    init_error_handlers(app)

    from brainzutils.ratelimit import inject_x_rate_headers, set_user_validation_function
    set_user_validation_function(check_ratelimit_token_whitelist)

    @app.after_request
    def after_request_callbacks(response):
        return inject_x_rate_headers(response)

    # Template utilities
    app.jinja_env.add_extension('jinja2.ext.do')
    from listenbrainz.webserver import utils
    app.jinja_env.filters['date'] = utils.reformat_date
    app.jinja_env.filters['datetime'] = utils.reformat_datetime

    return app
示例#8
0
def recalculate_all_user_data():

    timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)
    db.init_db_connection(config.SQLALCHEMY_DATABASE_URI)
    init_cache(host=config.REDIS_HOST,
               port=config.REDIS_PORT,
               namespace=config.REDIS_NAMESPACE)

    # Find the created timestamp of the last listen
    query = "SELECT max(created) FROM listen WHERE created > :date"
    try:
        with timescale.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query),
                                        date=datetime.now() -
                                        timedelta(weeks=4))
            row = result.fetchone()
            last_created_ts = row[0]
    except psycopg2.OperationalError as e:
        logger.error("Cannot query ts to fetch latest listen." % str(e),
                     exc_info=True)
        raise

    logger.info("Last created timestamp: " + str(last_created_ts))

    # Select a list of users
    user_list = []
    query = 'SELECT musicbrainz_id FROM "user"'
    try:
        with db.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query))
            for row in result:
                user_list.append(row[0])
    except psycopg2.OperationalError as e:
        logger.error("Cannot query db to fetch user list." % str(e),
                     exc_info=True)
        raise

    logger.info("Fetched %d users. Setting empty cache entries." %
                len(user_list))

    # Reset the timestamps and listen counts to 0 for all users
    for user_name in user_list:
        cache.set(REDIS_USER_LISTEN_COUNT + user_name,
                  0,
                  expirein=0,
                  encode=False)
        cache.set(REDIS_USER_LISTEN_COUNT + user_name,
                  0,
                  expirein=0,
                  encode=False)
        cache.set(REDIS_USER_TIMESTAMPS + user_name, "0,0", expirein=0)

    # Tabulate all of the listen counts/timestamps for all users
    logger.info("Scan the whole listen table...")
    listen_counts = defaultdict(int)
    user_timestamps = {}
    query = "SELECT listened_at, user_name FROM listen where created <= :ts"
    try:
        with timescale.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query),
                                        ts=last_created_ts)
            for row in result:
                ts = row[0]
                user_name = row[1]
                if user_name not in user_timestamps:
                    user_timestamps[user_name] = [ts, ts]
                else:
                    if ts > user_timestamps[user_name][1]:
                        user_timestamps[user_name][1] = ts
                    if ts < user_timestamps[user_name][0]:
                        user_timestamps[user_name][0] = ts

                listen_counts[user_name] += 1

    except psycopg2.OperationalError as e:
        logger.error("Cannot query db to fetch user list." % str(e),
                     exc_info=True)
        raise

    logger.info("Setting updated cache entries.")
    # Set the timestamps and listen counts for all users
    for user_name in user_list:
        try:
            cache.increment(REDIS_USER_LISTEN_COUNT + user_name,
                            amount=listen_counts[user_name])
        except KeyError:
            pass

        try:
            tss = cache.get(REDIS_USER_TIMESTAMPS + user_name)
            (min_ts, max_ts) = tss.split(",")
            min_ts = int(min_ts)
            max_ts = int(max_ts)
            if min_ts and min_ts < user_timestamps[user_name][0]:
                user_timestamps[user_name][0] = min_ts
            if max_ts and max_ts > user_timestamps[user_name][1]:
                user_timestamps[user_name][1] = max_ts
            cache.set(
                REDIS_USER_TIMESTAMPS + user_name,
                "%d,%d" %
                (user_timestamps[user_name][0], user_timestamps[user_name][1]),
                expirein=0)
        except KeyError:
            pass