def increment_listen_count_for_day(self, day: datetime, count: int):
     """ Increment the number of listens submitted on the day `day`
     by `count`.
     """
     key = self.LISTEN_COUNT_PER_DAY_KEY + day.strftime('%Y%m%d')
     cache.increment(key, amount=count)
     cache.expire(key, self.LISTEN_COUNT_PER_DAY_EXPIRY_TIME)
    def insert(self, listens):
        """
            Insert a batch of listens. Returns a list of (listened_at, track_name, user_name) that indicates
            which rows were inserted into the DB. If the row is not listed in the return values, it was a duplicate.
        """

        submit = []
        for listen in listens:
            submit.append(listen.to_timescale())

        query = """INSERT INTO listen (listened_at, track_name, user_name, data)
                        VALUES %s
                   ON CONFLICT (listened_at, track_name, user_name)
                    DO NOTHING
                     RETURNING listened_at, track_name, user_name"""

        inserted_rows = []
        conn = timescale.engine.raw_connection()
        with conn.cursor() as curs:
            try:
                execute_values(curs, query, submit, template=None)
                while True:
                    result = curs.fetchone()
                    if not result:
                        break
                    inserted_rows.append((result[0], result[1], result[2]))
            except UntranslatableCharacter:
                conn.rollback()
                return

        conn.commit()

        # update the listen counts and timestamps for the users
        user_timestamps = {}
        user_counts = defaultdict(int)
        for ts, _, user_name in inserted_rows:
            if user_name in user_timestamps:
                if ts < user_timestamps[user_name][0]:
                    user_timestamps[user_name][0] = ts
                if ts > user_timestamps[user_name][1]:
                    user_timestamps[user_name][1] = ts
            else:
                user_timestamps[user_name] = [ts, ts]

            user_counts[user_name] += 1

        for user_name in user_counts:
            cache.increment(REDIS_USER_LISTEN_COUNT + user_name,
                            amount=user_counts[user_name])

        for user in user_timestamps:
            self.update_timestamps_for_user(user, user_timestamps[user][0],
                                            user_timestamps[user][1])

        return inserted_rows
    def insert(self, listens):
        """ Insert a batch of listens.
        """

        submit = []
        user_names = {}
        for listen in listens:
            user_names[listen.user_name] = 1
            submit.append(listen.to_influx(quote(listen.user_name)))

        if not self.influx.write_points(submit, time_precision='s'):
            self.log.error(
                "Cannot write data to influx. (write_points returned False), data=%s",
                json.dumps(submit, indent=3))

        # If we reach this point, we were able to write the listens to the InfluxListenStore.
        # So update the listen counts of the users cached in brainzutils cache.
        for data in submit:
            user_key = "{}{}".format(REDIS_INFLUX_USER_LISTEN_COUNT,
                                     data['fields']['user_name'])

            cached_count = cache.get(user_key, decode=False)
            if cached_count:
                cache.increment(user_key)

        # Invalidate cached data for user
        for user_name in user_names.keys():
            cache.delete(REDIS_USER_TIMESTAMPS % user_name)

        if len(listens):
            # Enter a measurement to count items inserted
            submit = [{
                'measurement': TEMP_COUNT_MEASUREMENT,
                'tags': {
                    COUNT_MEASUREMENT_NAME: len(listens)
                },
                'fields': {
                    COUNT_MEASUREMENT_NAME: len(listens)
                }
            }]
            try:
                if not self.influx.write_points(submit):
                    self.log.error(
                        "Cannot write listen cound to influx. (write_points returned False)"
                    )
            except (InfluxDBServerError, InfluxDBClientError,
                    ValueError) as err:
                self.log.error("Cannot write data to influx: %s, data: %s",
                               str(err),
                               json.dumps(submit, indent=3),
                               exc_info=True)
                raise
示例#4
0
    def insert(self, listens):
        """
            Insert a batch of listens. Returns a list of (listened_at, track_name, user_name) that indicates
            which rows were inserted into the DB. If the row is not listed in the return values, it was a duplicate.
        """

        submit = []
        user_names = {}
        for listen in listens:
            user_names[listen.user_name] = 1
            submit.append(listen.to_timescale())

        query = """INSERT INTO listen (listened_at, track_name, user_name, data)
                        VALUES %s
                   ON CONFLICT (listened_at, track_name, user_name)
                    DO NOTHING
                     RETURNING listened_at, track_name, user_name"""

        inserted_rows = []
        conn = timescale.engine.raw_connection()
        with conn.cursor() as curs:
            try:
                execute_values(curs, query, submit, template=None)
                while True:
                    result = curs.fetchone()
                    if not result:
                        break
                    inserted_rows.append((result[0], result[1], result[2]))
            except UntranslatableCharacter:
                conn.rollback()
                return

        conn.commit()

        # So update the listen counts of the users cached in brainzutils cache.
        for _, _, user_name in inserted_rows:
            user_key = "{}{}".format(
                self.ns + REDIS_TIMESCALE_USER_LISTEN_COUNT, user_name)
            cached_count = cache.get(user_key, decode=False)
            if cached_count:
                cache.increment(user_key)

        # Invalidate cached data for user
        for user_name in user_names:
            cache.delete(self.ns + REDIS_USER_TIMESTAMPS % user_name)

        return inserted_rows
    def insert(self, listens):
        """ Insert a batch of listens.
        """

        submit = []
        user_names = {}
        for listen in listens:
            user_names[listen.user_name] = 1
            submit.append(listen.to_influx(quote(listen.user_name)))

        if not self.influx.write_points(submit, time_precision='s'):
            self.log.error("Cannot write data to influx. (write_points returned False), data=%s", json.dumps(submit, indent=3))

        # If we reach this point, we were able to write the listens to the InfluxListenStore.
        # So update the listen counts of the users cached in brainzutils cache.
        for data in submit:
            user_key = "{}{}".format(REDIS_INFLUX_USER_LISTEN_COUNT, data['fields']['user_name'])

            cached_count = cache.get(user_key, decode=False)
            if cached_count:
                cache.increment(user_key)

        # Invalidate cached data for user
        for user_name in user_names.keys():
            cache.delete(REDIS_USER_TIMESTAMPS % user_name)

        if len(listens):
            # Enter a measurement to count items inserted
            submit = [{
                'measurement': TEMP_COUNT_MEASUREMENT,
                'tags': {
                    COUNT_MEASUREMENT_NAME: len(listens)
                },
                'fields': {
                    COUNT_MEASUREMENT_NAME: len(listens)
                }
            }]
            try:
                if not self.influx.write_points(submit):
                    self.log.error("Cannot write listen cound to influx. (write_points returned False)")
            except (InfluxDBServerError, InfluxDBClientError, ValueError) as err:
                self.log.error("Cannot write data to influx: %s, data: %s", str(err), json.dumps(submit, indent=3), exc_info=True)
                raise
 def __init__(self, key_prefix, limit, per):
     current_time = int(time.time())
     self.reset = (current_time // per) * per + per
     self.seconds_before_reset = self.reset - current_time
     self.key = key_prefix + str(self.reset)
     self.limit = limit
     self.per = per
     self.current = cache.increment(self.key,
                                    namespace=ratelimit_cache_namespace)
     cache.expireat(self.key,
                    self.reset + self.expiration_window,
                    namespace=ratelimit_cache_namespace)
示例#7
0
 def test_increment_invalid_value(self):
     cache.set("a", "not a number")
     with self.assertRaises(redis.exceptions.ResponseError):
         cache.increment("a")
示例#8
0
 def test_increment(self):
     cache.set("a", 1, encode=False)
     self.assertEqual(cache.increment("a"), 2)
示例#9
0
def recalculate_all_user_data():

    timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI)
    db.init_db_connection(config.SQLALCHEMY_DATABASE_URI)
    init_cache(host=config.REDIS_HOST,
               port=config.REDIS_PORT,
               namespace=config.REDIS_NAMESPACE)

    # Find the created timestamp of the last listen
    query = "SELECT max(created) FROM listen WHERE created > :date"
    try:
        with timescale.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query),
                                        date=datetime.now() -
                                        timedelta(weeks=4))
            row = result.fetchone()
            last_created_ts = row[0]
    except psycopg2.OperationalError as e:
        logger.error("Cannot query ts to fetch latest listen." % str(e),
                     exc_info=True)
        raise

    logger.info("Last created timestamp: " + str(last_created_ts))

    # Select a list of users
    user_list = []
    query = 'SELECT musicbrainz_id FROM "user"'
    try:
        with db.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query))
            for row in result:
                user_list.append(row[0])
    except psycopg2.OperationalError as e:
        logger.error("Cannot query db to fetch user list." % str(e),
                     exc_info=True)
        raise

    logger.info("Fetched %d users. Setting empty cache entries." %
                len(user_list))

    # Reset the timestamps and listen counts to 0 for all users
    for user_name in user_list:
        cache.set(REDIS_USER_LISTEN_COUNT + user_name,
                  0,
                  expirein=0,
                  encode=False)
        cache.set(REDIS_USER_LISTEN_COUNT + user_name,
                  0,
                  expirein=0,
                  encode=False)
        cache.set(REDIS_USER_TIMESTAMPS + user_name, "0,0", expirein=0)

    # Tabulate all of the listen counts/timestamps for all users
    logger.info("Scan the whole listen table...")
    listen_counts = defaultdict(int)
    user_timestamps = {}
    query = "SELECT listened_at, user_name FROM listen where created <= :ts"
    try:
        with timescale.engine.connect() as connection:
            result = connection.execute(sqlalchemy.text(query),
                                        ts=last_created_ts)
            for row in result:
                ts = row[0]
                user_name = row[1]
                if user_name not in user_timestamps:
                    user_timestamps[user_name] = [ts, ts]
                else:
                    if ts > user_timestamps[user_name][1]:
                        user_timestamps[user_name][1] = ts
                    if ts < user_timestamps[user_name][0]:
                        user_timestamps[user_name][0] = ts

                listen_counts[user_name] += 1

    except psycopg2.OperationalError as e:
        logger.error("Cannot query db to fetch user list." % str(e),
                     exc_info=True)
        raise

    logger.info("Setting updated cache entries.")
    # Set the timestamps and listen counts for all users
    for user_name in user_list:
        try:
            cache.increment(REDIS_USER_LISTEN_COUNT + user_name,
                            amount=listen_counts[user_name])
        except KeyError:
            pass

        try:
            tss = cache.get(REDIS_USER_TIMESTAMPS + user_name)
            (min_ts, max_ts) = tss.split(",")
            min_ts = int(min_ts)
            max_ts = int(max_ts)
            if min_ts and min_ts < user_timestamps[user_name][0]:
                user_timestamps[user_name][0] = min_ts
            if max_ts and max_ts > user_timestamps[user_name][1]:
                user_timestamps[user_name][1] = max_ts
            cache.set(
                REDIS_USER_TIMESTAMPS + user_name,
                "%d,%d" %
                (user_timestamps[user_name][0], user_timestamps[user_name][1]),
                expirein=0)
        except KeyError:
            pass
示例#10
0
 def test_increment_invalid_value(self):
     cache.set("a", "not a number")
     with self.assertRaises(redis.exceptions.ResponseError):
         cache.increment("a")
示例#11
0
 def test_increment(self):
     cache.set("a", 1, encode=False)
     self.assertEqual(cache.increment("a"), 2)