def increment_listen_count_for_day(self, day: datetime, count: int): """ Increment the number of listens submitted on the day `day` by `count`. """ key = self.LISTEN_COUNT_PER_DAY_KEY + day.strftime('%Y%m%d') cache.increment(key, amount=count) cache.expire(key, self.LISTEN_COUNT_PER_DAY_EXPIRY_TIME)
def insert(self, listens): """ Insert a batch of listens. Returns a list of (listened_at, track_name, user_name) that indicates which rows were inserted into the DB. If the row is not listed in the return values, it was a duplicate. """ submit = [] for listen in listens: submit.append(listen.to_timescale()) query = """INSERT INTO listen (listened_at, track_name, user_name, data) VALUES %s ON CONFLICT (listened_at, track_name, user_name) DO NOTHING RETURNING listened_at, track_name, user_name""" inserted_rows = [] conn = timescale.engine.raw_connection() with conn.cursor() as curs: try: execute_values(curs, query, submit, template=None) while True: result = curs.fetchone() if not result: break inserted_rows.append((result[0], result[1], result[2])) except UntranslatableCharacter: conn.rollback() return conn.commit() # update the listen counts and timestamps for the users user_timestamps = {} user_counts = defaultdict(int) for ts, _, user_name in inserted_rows: if user_name in user_timestamps: if ts < user_timestamps[user_name][0]: user_timestamps[user_name][0] = ts if ts > user_timestamps[user_name][1]: user_timestamps[user_name][1] = ts else: user_timestamps[user_name] = [ts, ts] user_counts[user_name] += 1 for user_name in user_counts: cache.increment(REDIS_USER_LISTEN_COUNT + user_name, amount=user_counts[user_name]) for user in user_timestamps: self.update_timestamps_for_user(user, user_timestamps[user][0], user_timestamps[user][1]) return inserted_rows
def insert(self, listens): """ Insert a batch of listens. """ submit = [] user_names = {} for listen in listens: user_names[listen.user_name] = 1 submit.append(listen.to_influx(quote(listen.user_name))) if not self.influx.write_points(submit, time_precision='s'): self.log.error( "Cannot write data to influx. (write_points returned False), data=%s", json.dumps(submit, indent=3)) # If we reach this point, we were able to write the listens to the InfluxListenStore. # So update the listen counts of the users cached in brainzutils cache. for data in submit: user_key = "{}{}".format(REDIS_INFLUX_USER_LISTEN_COUNT, data['fields']['user_name']) cached_count = cache.get(user_key, decode=False) if cached_count: cache.increment(user_key) # Invalidate cached data for user for user_name in user_names.keys(): cache.delete(REDIS_USER_TIMESTAMPS % user_name) if len(listens): # Enter a measurement to count items inserted submit = [{ 'measurement': TEMP_COUNT_MEASUREMENT, 'tags': { COUNT_MEASUREMENT_NAME: len(listens) }, 'fields': { COUNT_MEASUREMENT_NAME: len(listens) } }] try: if not self.influx.write_points(submit): self.log.error( "Cannot write listen cound to influx. (write_points returned False)" ) except (InfluxDBServerError, InfluxDBClientError, ValueError) as err: self.log.error("Cannot write data to influx: %s, data: %s", str(err), json.dumps(submit, indent=3), exc_info=True) raise
def insert(self, listens): """ Insert a batch of listens. Returns a list of (listened_at, track_name, user_name) that indicates which rows were inserted into the DB. If the row is not listed in the return values, it was a duplicate. """ submit = [] user_names = {} for listen in listens: user_names[listen.user_name] = 1 submit.append(listen.to_timescale()) query = """INSERT INTO listen (listened_at, track_name, user_name, data) VALUES %s ON CONFLICT (listened_at, track_name, user_name) DO NOTHING RETURNING listened_at, track_name, user_name""" inserted_rows = [] conn = timescale.engine.raw_connection() with conn.cursor() as curs: try: execute_values(curs, query, submit, template=None) while True: result = curs.fetchone() if not result: break inserted_rows.append((result[0], result[1], result[2])) except UntranslatableCharacter: conn.rollback() return conn.commit() # So update the listen counts of the users cached in brainzutils cache. for _, _, user_name in inserted_rows: user_key = "{}{}".format( self.ns + REDIS_TIMESCALE_USER_LISTEN_COUNT, user_name) cached_count = cache.get(user_key, decode=False) if cached_count: cache.increment(user_key) # Invalidate cached data for user for user_name in user_names: cache.delete(self.ns + REDIS_USER_TIMESTAMPS % user_name) return inserted_rows
def insert(self, listens): """ Insert a batch of listens. """ submit = [] user_names = {} for listen in listens: user_names[listen.user_name] = 1 submit.append(listen.to_influx(quote(listen.user_name))) if not self.influx.write_points(submit, time_precision='s'): self.log.error("Cannot write data to influx. (write_points returned False), data=%s", json.dumps(submit, indent=3)) # If we reach this point, we were able to write the listens to the InfluxListenStore. # So update the listen counts of the users cached in brainzutils cache. for data in submit: user_key = "{}{}".format(REDIS_INFLUX_USER_LISTEN_COUNT, data['fields']['user_name']) cached_count = cache.get(user_key, decode=False) if cached_count: cache.increment(user_key) # Invalidate cached data for user for user_name in user_names.keys(): cache.delete(REDIS_USER_TIMESTAMPS % user_name) if len(listens): # Enter a measurement to count items inserted submit = [{ 'measurement': TEMP_COUNT_MEASUREMENT, 'tags': { COUNT_MEASUREMENT_NAME: len(listens) }, 'fields': { COUNT_MEASUREMENT_NAME: len(listens) } }] try: if not self.influx.write_points(submit): self.log.error("Cannot write listen cound to influx. (write_points returned False)") except (InfluxDBServerError, InfluxDBClientError, ValueError) as err: self.log.error("Cannot write data to influx: %s, data: %s", str(err), json.dumps(submit, indent=3), exc_info=True) raise
def __init__(self, key_prefix, limit, per): current_time = int(time.time()) self.reset = (current_time // per) * per + per self.seconds_before_reset = self.reset - current_time self.key = key_prefix + str(self.reset) self.limit = limit self.per = per self.current = cache.increment(self.key, namespace=ratelimit_cache_namespace) cache.expireat(self.key, self.reset + self.expiration_window, namespace=ratelimit_cache_namespace)
def test_increment_invalid_value(self): cache.set("a", "not a number") with self.assertRaises(redis.exceptions.ResponseError): cache.increment("a")
def test_increment(self): cache.set("a", 1, encode=False) self.assertEqual(cache.increment("a"), 2)
def recalculate_all_user_data(): timescale.init_db_connection(config.SQLALCHEMY_TIMESCALE_URI) db.init_db_connection(config.SQLALCHEMY_DATABASE_URI) init_cache(host=config.REDIS_HOST, port=config.REDIS_PORT, namespace=config.REDIS_NAMESPACE) # Find the created timestamp of the last listen query = "SELECT max(created) FROM listen WHERE created > :date" try: with timescale.engine.connect() as connection: result = connection.execute(sqlalchemy.text(query), date=datetime.now() - timedelta(weeks=4)) row = result.fetchone() last_created_ts = row[0] except psycopg2.OperationalError as e: logger.error("Cannot query ts to fetch latest listen." % str(e), exc_info=True) raise logger.info("Last created timestamp: " + str(last_created_ts)) # Select a list of users user_list = [] query = 'SELECT musicbrainz_id FROM "user"' try: with db.engine.connect() as connection: result = connection.execute(sqlalchemy.text(query)) for row in result: user_list.append(row[0]) except psycopg2.OperationalError as e: logger.error("Cannot query db to fetch user list." % str(e), exc_info=True) raise logger.info("Fetched %d users. Setting empty cache entries." % len(user_list)) # Reset the timestamps and listen counts to 0 for all users for user_name in user_list: cache.set(REDIS_USER_LISTEN_COUNT + user_name, 0, expirein=0, encode=False) cache.set(REDIS_USER_LISTEN_COUNT + user_name, 0, expirein=0, encode=False) cache.set(REDIS_USER_TIMESTAMPS + user_name, "0,0", expirein=0) # Tabulate all of the listen counts/timestamps for all users logger.info("Scan the whole listen table...") listen_counts = defaultdict(int) user_timestamps = {} query = "SELECT listened_at, user_name FROM listen where created <= :ts" try: with timescale.engine.connect() as connection: result = connection.execute(sqlalchemy.text(query), ts=last_created_ts) for row in result: ts = row[0] user_name = row[1] if user_name not in user_timestamps: user_timestamps[user_name] = [ts, ts] else: if ts > user_timestamps[user_name][1]: user_timestamps[user_name][1] = ts if ts < user_timestamps[user_name][0]: user_timestamps[user_name][0] = ts listen_counts[user_name] += 1 except psycopg2.OperationalError as e: logger.error("Cannot query db to fetch user list." % str(e), exc_info=True) raise logger.info("Setting updated cache entries.") # Set the timestamps and listen counts for all users for user_name in user_list: try: cache.increment(REDIS_USER_LISTEN_COUNT + user_name, amount=listen_counts[user_name]) except KeyError: pass try: tss = cache.get(REDIS_USER_TIMESTAMPS + user_name) (min_ts, max_ts) = tss.split(",") min_ts = int(min_ts) max_ts = int(max_ts) if min_ts and min_ts < user_timestamps[user_name][0]: user_timestamps[user_name][0] = min_ts if max_ts and max_ts > user_timestamps[user_name][1]: user_timestamps[user_name][1] = max_ts cache.set( REDIS_USER_TIMESTAMPS + user_name, "%d,%d" % (user_timestamps[user_name][0], user_timestamps[user_name][1]), expirein=0) except KeyError: pass