def bulk_get_streams(realm, stream_names): if isinstance(realm, Realm): realm_id = realm.id else: realm_id = realm def fetch_streams_by_name(stream_names): # This should be just # # Stream.objects.select_related("realm").filter(name__iexact__in=stream_names, # realm_id=realm_id) # # But chaining __in and __iexact doesn't work with Django's # ORM, so we have the following hack to construct the relevant where clause if len(stream_names) == 0: return [] upper_list = ", ".join(["UPPER(%s)"] * len(stream_names)) where_clause = "UPPER(zerver_stream.name::text) IN (%s)" % (upper_list,) return get_active_streams(realm_id).select_related("realm").extra( where=[where_clause], params=stream_names) return generic_bulk_cached_fetch(lambda stream_name: get_stream_cache_key(stream_name, realm), fetch_streams_by_name, [stream_name.lower() for stream_name in stream_names], id_fetcher=lambda stream: stream.name.lower())
def user_ids_to_users(user_ids: List[int], realm: Realm) -> List[UserProfile]: # TODO: Consider adding a flag to control whether deactivated # users should be included. def fetch_users_by_id(user_ids: List[int]) -> List[UserProfile]: if len(user_ids) == 0: return [] return list(UserProfile.objects.filter(id__in=user_ids).select_related()) user_profiles_by_id = generic_bulk_cached_fetch( cache_key_function=user_profile_by_id_cache_key, query_function=fetch_users_by_id, object_ids=user_ids ) # type: Dict[int, UserProfile] found_user_ids = user_profiles_by_id.keys() missed_user_ids = [user_id for user_id in user_ids if user_id not in found_user_ids] if missed_user_ids: raise JsonableError(_("Invalid user ID: %s" % (missed_user_ids[0]))) user_profiles = list(user_profiles_by_id.values()) for user_profile in user_profiles: if user_profile.realm != realm: raise JsonableError(_("Invalid user ID: %s" % (user_profile.id,))) return user_profiles
def messages_for_ids(message_ids: List[int], user_message_flags: Dict[int, List[str]], search_fields: Dict[int, Dict[str, str]], apply_markdown: bool, client_gravatar: bool, allow_edit_history: bool) -> List[Dict[str, Any]]: cache_transformer = MessageDict.build_dict_from_raw_db_row id_fetcher = lambda row: row['id'] message_dicts = generic_bulk_cached_fetch(to_dict_cache_key_id, MessageDict.get_raw_db_rows, message_ids, id_fetcher=id_fetcher, cache_transformer=cache_transformer, extractor=extract_message_dict, setter=stringify_message_dict) message_list = [] # type: List[Dict[str, Any]] for message_id in message_ids: msg_dict = message_dicts[message_id] msg_dict.update({"flags": user_message_flags[message_id]}) if message_id in search_fields: msg_dict.update(search_fields[message_id]) # Make sure that we never send message edit history to clients # in realms with allow_edit_history disabled. if "edit_history" in msg_dict and not allow_edit_history: del msg_dict["edit_history"] message_list.append(msg_dict) MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar) return message_list
def bulk_get_recipients(type, type_ids): def cache_key_function(type_id): return get_recipient_cache_key(type, type_id) def query_function(type_ids): return Recipient.objects.filter(type=type, type_id__in=type_ids) return generic_bulk_cached_fetch(cache_key_function, query_function, type_ids, id_fetcher=lambda recipient: recipient.type_id)
def bulk_get_users(emails: List[str], realm: Optional[Realm], base_query: 'QuerySet[UserProfile]'=None) -> Dict[str, UserProfile]: if base_query is None: assert realm is not None query = UserProfile.objects.filter(realm=realm, is_active=True) realm_id = realm.id else: # WARNING: Currently, this code path only really supports one # version of `base_query` being used (because otherwise, # they'll share the cache, which can screw up the filtering). # If you're using this flow, you'll need to re-do any filters # in base_query in the code itself; base_query is just a perf # optimization. query = base_query realm_id = 0 def fetch_users_by_email(emails: List[str]) -> List[UserProfile]: # This should be just # # UserProfile.objects.select_related("realm").filter(email__iexact__in=emails, # realm=realm) # # But chaining __in and __iexact doesn't work with Django's # ORM, so we have the following hack to construct the relevant where clause if len(emails) == 0: return [] upper_list = ", ".join(["UPPER(%s)"] * len(emails)) where_clause = "UPPER(zerver_userprofile.email::text) IN (%s)" % (upper_list,) return query.select_related("realm").extra( where=[where_clause], params=emails) return generic_bulk_cached_fetch( # Use a separate cache key to protect us from conflicts with # the get_user cache. lambda email: 'bulk_get_users:' + user_profile_cache_key_id(email, realm_id), fetch_users_by_email, [email.lower() for email in emails], id_fetcher=lambda user_profile: user_profile.email.lower() )
def bulk_get_users(emails: List[str], realm: Optional[Realm], base_query: 'QuerySet[UserProfile]'=None) -> Dict[str, UserProfile]: if base_query is None: assert realm is not None query = UserProfile.objects.filter(realm=realm, is_active=True) realm_id = realm.id else: # WARNING: Currently, this code path only really supports one # version of `base_query` being used (because otherwise, # they'll share the cache, which can screw up the filtering). # If you're using this flow, you'll need to re-do any filters # in base_query in the code itself; base_query is just a perf # optimization. query = base_query realm_id = 0 def fetch_users_by_email(emails: List[str]) -> List[UserProfile]: # This should be just # # UserProfile.objects.select_related("realm").filter(email__iexact__in=emails, # realm=realm) # # But chaining __in and __iexact doesn't work with Django's # ORM, so we have the following hack to construct the relevant where clause where_clause = "upper(zerver_userprofile.email::text) IN (SELECT upper(email) FROM unnest(%s) AS email)" return query.select_related("realm").extra( where=[where_clause], params=(emails,)) def user_to_email(user_profile: UserProfile) -> str: return user_profile.email.lower() return generic_bulk_cached_fetch( # Use a separate cache key to protect us from conflicts with # the get_user cache. lambda email: 'bulk_get_users:' + user_profile_cache_key_id(email, realm_id), fetch_users_by_email, [email.lower() for email in emails], id_fetcher=user_to_email, )
def test_empty_object_ids_list(self) -> None: class CustomException(Exception): pass def cache_key_function( email: str ) -> str: # nocoverage -- this is just here to make sure it's not called raise CustomException("The cache key function was called") def query_function( emails: List[str] ) -> List[ UserProfile]: # nocoverage -- this is just here to make sure it's not called raise CustomException("The query function was called") # query_function and cache_key_function shouldn't be called, because # objects_ids is empty, so there's nothing to do. result: Dict[str, UserProfile] = generic_bulk_cached_fetch( cache_key_function=cache_key_function, query_function=query_function, object_ids=[]) self.assertEqual(result, {})
def messages_for_ids( message_ids: List[int], user_message_flags: Dict[int, List[str]], search_fields: Dict[int, Dict[str, str]], apply_markdown: bool, client_gravatar: bool, allow_edit_history: bool, ) -> List[Dict[str, Any]]: cache_transformer = MessageDict.build_dict_from_raw_db_row id_fetcher = lambda row: row["id"] message_dicts = generic_bulk_cached_fetch( to_dict_cache_key_id, MessageDict.get_raw_db_rows, message_ids, id_fetcher=id_fetcher, cache_transformer=cache_transformer, extractor=extract_message_dict, setter=stringify_message_dict, ) message_list: List[Dict[str, Any]] = [] for message_id in message_ids: msg_dict = message_dicts[message_id] msg_dict.update(flags=user_message_flags[message_id]) if message_id in search_fields: msg_dict.update(search_fields[message_id]) # Make sure that we never send message edit history to clients # in realms with allow_edit_history disabled. if "edit_history" in msg_dict and not allow_edit_history: del msg_dict["edit_history"] message_list.append(msg_dict) MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar) return message_list
def user_ids_to_users(user_ids: Sequence[int], realm: Realm) -> List[UserProfile]: # TODO: Consider adding a flag to control whether deactivated # users should be included. def fetch_users_by_id(user_ids: List[int]) -> List[UserProfile]: return list(UserProfile.objects.filter(id__in=user_ids).select_related()) user_profiles_by_id: Dict[int, UserProfile] = generic_bulk_cached_fetch( cache_key_function=user_profile_by_id_cache_key, query_function=fetch_users_by_id, object_ids=user_ids, ) found_user_ids = user_profiles_by_id.keys() missed_user_ids = [user_id for user_id in user_ids if user_id not in found_user_ids] if missed_user_ids: raise JsonableError(_("Invalid user ID: %s") % (missed_user_ids[0],)) user_profiles = list(user_profiles_by_id.values()) for user_profile in user_profiles: if user_profile.realm != realm: raise JsonableError(_("Invalid user ID: %s") % (user_profile.id,)) return user_profiles
def bulk_fetch_display_recipients( recipient_tuples: Set[Tuple[int, int, int]]) -> Dict[int, DisplayRecipientT]: """ Takes set of tuples of the form (recipient_id, recipient_type, recipient_type_id) Returns dict mapping recipient_id to corresponding display_recipient """ # Build dict mapping recipient id to (type, type_id) of the corresponding recipient: recipient_id_to_type_pair_dict = { recipient[0]: (recipient[1], recipient[2]) for recipient in recipient_tuples } # And the inverse mapping: type_pair_to_recipient_id_dict = {(recipient[1], recipient[2]): recipient[0] for recipient in recipient_tuples} stream_recipients = { recipient for recipient in recipient_tuples if recipient[1] == Recipient.STREAM } personal_and_huddle_recipients = recipient_tuples - stream_recipients def stream_query_function( recipient_ids: List[int]) -> List[TinyStreamResult]: stream_ids = [ recipient_id_to_type_pair_dict[recipient_id][1] for recipient_id in recipient_ids ] return Stream.objects.filter(id__in=stream_ids).values('name', 'id') def stream_id_fetcher(stream: TinyStreamResult) -> int: return type_pair_to_recipient_id_dict[(Recipient.STREAM, stream['id'])] def stream_cache_transformer(stream: TinyStreamResult) -> str: return stream['name'] # ItemT = Stream, CacheItemT = str (name), ObjKT = int (recipient_id) stream_display_recipients: Dict[int, str] = generic_bulk_cached_fetch( cache_key_function=display_recipient_cache_key, query_function=stream_query_function, object_ids=[recipient[0] for recipient in stream_recipients], id_fetcher=stream_id_fetcher, cache_transformer=stream_cache_transformer, ) # Now we have to create display_recipients for personal and huddle messages. # We do this via generic_bulk_cached_fetch, supplying apprioprate functions to it. def personal_and_huddle_query_function( recipient_ids: List[int] ) -> List[Tuple[int, List[UserDisplayRecipient]]]: """ Return a list of tuples of the form (recipient_id, [list of UserProfiles]) where [list of UserProfiles] has users corresponding to the recipient, so the receiving userin Recipient.PERSONAL case, or in Personal.HUDDLE case - users in the huddle. This is a pretty hacky return value, but it needs to be in this form, for this function to work as the query_function in generic_bulk_cached_fetch. """ recipients = [ Recipient(id=recipient_id, type=recipient_id_to_type_pair_dict[recipient_id][0], type_id=recipient_id_to_type_pair_dict[recipient_id][1]) for recipient_id in recipient_ids ] # Find all user ids whose UserProfiles we will need to fetch: user_ids_to_fetch: Set[int] = set() huddle_user_ids: Dict[int, List[int]] = {} huddle_user_ids = bulk_get_huddle_user_ids([ recipient for recipient in recipients if recipient.type == Recipient.HUDDLE ]) for recipient in recipients: if recipient.type == Recipient.PERSONAL: user_ids_to_fetch.add(recipient.type_id) else: user_ids_to_fetch = user_ids_to_fetch.union( huddle_user_ids[recipient.id]) # Fetch the needed UserProfiles: user_profiles: Dict[ int, UserDisplayRecipient] = bulk_get_user_profile_by_id( list(user_ids_to_fetch)) # Build the return value: result: List[Tuple[int, List[UserDisplayRecipient]]] = [] for recipient in recipients: if recipient.type == Recipient.PERSONAL: result.append( (recipient.id, [user_profiles[recipient.type_id]])) else: result.append((recipient.id, [ user_profiles[user_id] for user_id in huddle_user_ids[recipient.id] ])) return result def personal_and_huddle_cache_transformer( db_object: Tuple[int, List[UserDisplayRecipient]] ) -> List[UserDisplayRecipient]: """ Takes an element of the list returned by the query_function, maps it to the final display_recipient list. """ user_profile_list = db_object[1] display_recipient = user_profile_list return display_recipient def personal_and_huddle_id_fetcher( db_object: Tuple[int, List[UserDisplayRecipient]]) -> int: # db_object is a tuple, with recipient_id in the first position return db_object[0] # ItemT = Tuple[int, List[UserDisplayRecipient]] (recipient_id, list of corresponding users) # CacheItemT = List[UserDisplayRecipient] (display_recipient list) # ObjKT = int (recipient_id) personal_and_huddle_display_recipients = generic_bulk_cached_fetch( cache_key_function=display_recipient_cache_key, query_function=personal_and_huddle_query_function, object_ids=[ recipient[0] for recipient in personal_and_huddle_recipients ], id_fetcher=personal_and_huddle_id_fetcher, cache_transformer=personal_and_huddle_cache_transformer) # Glue the dicts together and return: return { **stream_display_recipients, **personal_and_huddle_display_recipients }
def get_old_messages_backend(request, user_profile, anchor = REQ(converter=int), num_before = REQ(converter=to_non_negative_int), num_after = REQ(converter=to_non_negative_int), narrow = REQ('narrow', converter=narrow_parameter, default=None), use_first_unread_anchor = REQ(default=False, converter=ujson.loads), apply_markdown=REQ(default=True, converter=ujson.loads)): # type: (HttpRequest, UserProfile, int, int, int, Optional[List[Dict[str, Any]]], bool, bool) -> HttpResponse include_history = ok_to_include_history(narrow, user_profile.realm) if include_history and not use_first_unread_anchor: query = select([column("id").label("message_id")], None, table("zerver_message")) inner_msg_id_col = literal_column("zerver_message.id") elif narrow is None: query = select([column("message_id"), column("flags")], column("user_profile_id") == literal(user_profile.id), table("zerver_usermessage")) inner_msg_id_col = column("message_id") else: # TODO: Don't do this join if we're not doing a search query = select([column("message_id"), column("flags")], column("user_profile_id") == literal(user_profile.id), join(table("zerver_usermessage"), table("zerver_message"), literal_column("zerver_usermessage.message_id") == literal_column("zerver_message.id"))) inner_msg_id_col = column("message_id") num_extra_messages = 1 is_search = False if narrow is not None: # Add some metadata to our logging data for narrows verbose_operators = [] for term in narrow: if term['operator'] == "is": verbose_operators.append("is:" + term['operand']) else: verbose_operators.append(term['operator']) request._log_data['extra'] = "[%s]" % (",".join(verbose_operators),) # Build the query for the narrow num_extra_messages = 0 builder = NarrowBuilder(user_profile, inner_msg_id_col) search_term = None # type: Optional[Dict[str, Any]] for term in narrow: if term['operator'] == 'search': if not is_search: search_term = term query = query.column(column("subject")).column(column("rendered_content")) is_search = True else: # Join the search operators if there are multiple of them search_term['operand'] += ' ' + term['operand'] else: query = builder.add_term(query, term) if is_search: query = builder.add_term(query, search_term) # We add 1 to the number of messages requested if no narrow was # specified to ensure that the resulting list always contains the # anchor message. If a narrow was specified, the anchor message # might not match the narrow anyway. if num_after != 0: num_after += num_extra_messages else: num_before += num_extra_messages sa_conn = get_sqlalchemy_connection() if use_first_unread_anchor: condition = column("flags").op("&")(UserMessage.flags.read.mask) == 0 # We exclude messages on muted topics when finding the first unread # message in this narrow muting_conditions = exclude_muting_conditions(user_profile, narrow) if muting_conditions: condition = and_(condition, *muting_conditions) first_unread_query = query.where(condition) first_unread_query = first_unread_query.order_by(inner_msg_id_col.asc()).limit(1) first_unread_result = list(sa_conn.execute(first_unread_query).fetchall()) if len(first_unread_result) > 0: anchor = first_unread_result[0][0] else: anchor = LARGER_THAN_MAX_MESSAGE_ID before_query = None after_query = None if num_before != 0: before_anchor = anchor if num_after != 0: # Don't include the anchor in both the before query and the after query before_anchor = anchor - 1 before_query = query.where(inner_msg_id_col <= before_anchor) \ .order_by(inner_msg_id_col.desc()).limit(num_before) if num_after != 0: after_query = query.where(inner_msg_id_col >= anchor) \ .order_by(inner_msg_id_col.asc()).limit(num_after) if anchor == LARGER_THAN_MAX_MESSAGE_ID: # There's no need for an after_query if we're targeting just the target message. after_query = None if before_query is not None: if after_query is not None: query = union_all(before_query.self_group(), after_query.self_group()) else: query = before_query elif after_query is not None: query = after_query else: # This can happen when a narrow is specified. query = query.where(inner_msg_id_col == anchor) main_query = alias(query) query = select(main_query.c, None, main_query).order_by(column("message_id").asc()) # This is a hack to tag the query we use for testing query = query.prefix_with("/* get_old_messages */") query_result = list(sa_conn.execute(query).fetchall()) # The following is a little messy, but ensures that the code paths # are similar regardless of the value of include_history. The # 'user_messages' dictionary maps each message to the user's # UserMessage object for that message, which we will attach to the # rendered message dict before returning it. We attempt to # bulk-fetch rendered message dicts from remote cache using the # 'messages' list. search_fields = dict() # type: Dict[int, Dict[str, Text]] message_ids = [] # type: List[int] user_message_flags = {} # type: Dict[int, List[str]] if include_history: message_ids = [row[0] for row in query_result] # TODO: This could be done with an outer join instead of two queries user_message_flags = dict((user_message.message_id, user_message.flags_list()) for user_message in UserMessage.objects.filter(user_profile=user_profile, message__id__in=message_ids)) for row in query_result: message_id = row[0] if user_message_flags.get(message_id) is None: user_message_flags[message_id] = ["read", "historical"] if is_search: (_, subject, rendered_content, content_matches, subject_matches) = row search_fields[message_id] = get_search_fields(rendered_content, subject, content_matches, subject_matches) else: for row in query_result: message_id = row[0] flags = row[1] user_message_flags[message_id] = parse_usermessage_flags(flags) message_ids.append(message_id) if is_search: (_, _, subject, rendered_content, content_matches, subject_matches) = row search_fields[message_id] = get_search_fields(rendered_content, subject, content_matches, subject_matches) cache_transformer = lambda row: MessageDict.build_dict_from_raw_db_row(row, apply_markdown) id_fetcher = lambda row: row['id'] message_dicts = generic_bulk_cached_fetch(lambda message_id: to_dict_cache_key_id(message_id, apply_markdown), Message.get_raw_db_rows, message_ids, id_fetcher=id_fetcher, cache_transformer=cache_transformer, extractor=extract_message_dict, setter=stringify_message_dict) message_list = [] for message_id in message_ids: msg_dict = message_dicts[message_id] msg_dict.update({"flags": user_message_flags[message_id]}) msg_dict.update(search_fields.get(message_id, {})) message_list.append(msg_dict) statsd.incr('loaded_old_messages', len(message_list)) ret = {'messages': message_list, "result": "success", "msg": ""} return json_success(ret)