def _delete_url_cache_media_txn(txn: LoggingTransaction) -> None: sql = "DELETE FROM local_media_repository WHERE media_id = ?" txn.execute_batch(sql, [(media_id,) for media_id in media_ids]) sql = "DELETE FROM local_media_repository_thumbnails WHERE media_id = ?" txn.execute_batch(sql, [(media_id,) for media_id in media_ids])
def _devices_last_seen_update_txn(txn: LoggingTransaction) -> int: # This consists of two queries: # # 1. The sub-query searches for the next N devices and joins # against user_ips to find the max last_seen associated with # that device. # 2. The outer query then joins again against user_ips on # user/device/last_seen. This *should* hopefully only # return one row, but if it does return more than one then # we'll just end up updating the same device row multiple # times, which is fine. where_args: List[Union[str, int]] where_clause, where_args = make_tuple_comparison_clause( [("user_id", last_user_id), ("device_id", last_device_id)], ) sql = """ SELECT last_seen, ip, user_agent, user_id, device_id FROM ( SELECT user_id, device_id, MAX(u.last_seen) AS last_seen FROM devices INNER JOIN user_ips AS u USING (user_id, device_id) WHERE %(where_clause)s GROUP BY user_id, device_id ORDER BY user_id ASC, device_id ASC LIMIT ? ) c INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) """ % { "where_clause": where_clause } txn.execute(sql, where_args + [batch_size]) rows = cast(List[Tuple[int, str, str, str, str]], txn.fetchall()) if not rows: return 0 sql = """ UPDATE devices SET last_seen = ?, ip = ?, user_agent = ? WHERE user_id = ? AND device_id = ? """ txn.execute_batch(sql, rows) _, _, _, user_id, device_id = rows[-1] self.db_pool.updates._background_update_progress_txn( txn, "devices_last_seen", { "last_user_id": user_id, "last_device_id": device_id }, ) return len(rows)
def reindex_search_txn(txn: LoggingTransaction) -> int: sql = ("SELECT stream_ordering, event_id FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " ORDER BY stream_ordering DESC" " LIMIT ?") txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) rows = txn.fetchall() if not rows: return 0 min_stream_id = rows[-1][0] event_ids = [row[1] for row in rows] rows_to_update = [] chunks = [ event_ids[i:i + 100] for i in range(0, len(event_ids), 100) ] for chunk in chunks: ev_rows = self.db_pool.simple_select_many_txn( txn, table="event_json", column="event_id", iterable=chunk, retcols=["event_id", "json"], keyvalues={}, ) for row in ev_rows: event_id = row["event_id"] event_json = db_to_json(row["json"]) try: origin_server_ts = event_json["origin_server_ts"] except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. continue rows_to_update.append((origin_server_ts, event_id)) sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?" txn.execute_batch(sql, rows_to_update) progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, "rows_inserted": rows_inserted + len(rows_to_update), } self.db_pool.updates._background_update_progress_txn( txn, _BackgroundUpdates.EVENT_ORIGIN_SERVER_TS_NAME, progress) return len(rows_to_update)
def update_cache_txn(txn: LoggingTransaction) -> None: sql = ("UPDATE remote_media_cache SET last_access_ts = ?" " WHERE media_origin = ? AND media_id = ?") txn.execute_batch( sql, ((time_ms, media_origin, media_id) for media_origin, media_id in remote_media), ) sql = ("UPDATE local_media_repository SET last_access_ts = ?" " WHERE media_id = ?") txn.execute_batch(sql, ((time_ms, media_id) for media_id in local_media))
def _add_push_actions_to_staging_txn(txn: LoggingTransaction) -> None: # We don't use simple_insert_many here to avoid the overhead # of generating lists of dicts. sql = """ INSERT INTO event_push_actions_staging (event_id, user_id, actions, notif, highlight, unread) VALUES (?, ?, ?, ?, ?, ?) """ txn.execute_batch( sql, (_gen_entry(user_id, actions) for user_id, actions in user_id_actions.items()), )
def reindex_txn(txn: LoggingTransaction) -> int: sql = ("SELECT stream_ordering, event_id, json FROM events" " INNER JOIN event_json USING (event_id)" " WHERE ? <= stream_ordering AND stream_ordering < ?" " ORDER BY stream_ordering DESC" " LIMIT ?") txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) rows = txn.fetchall() if not rows: return 0 min_stream_id = rows[-1][0] update_rows = [] for row in rows: try: event_id = row[1] event_json = db_to_json(row[2]) sender = event_json["sender"] content = event_json["content"] contains_url = "url" in content if contains_url: contains_url &= isinstance(content["url"], str) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. continue update_rows.append((sender, contains_url, event_id)) sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?" txn.execute_batch(sql, update_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, "rows_inserted": rows_inserted + len(rows), } self.db_pool.updates._background_update_progress_txn( txn, _BackgroundUpdates.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress) return len(rows)
def store_search_entries_txn(self, txn: LoggingTransaction, entries: Iterable[SearchEntry]) -> None: """Add entries to the search table Args: txn: entries: entries to be added to the table """ if not self.hs.config.server.enable_search: return if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)") args1 = (( entry.event_id, entry.room_id, entry.key, _clean_value_for_search(entry.value), entry.stream_ordering, entry.origin_server_ts, ) for entry in entries) txn.execute_batch(sql, args1) elif isinstance(self.database_engine, Sqlite3Engine): sql = ("INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)") args2 = (( entry.event_id, entry.room_id, entry.key, _clean_value_for_search(entry.value), ) for entry in entries) txn.execute_batch(sql, args2) else: # This should be unreachable. raise Exception("Unrecognized database engine")
def _store_destination_rooms_entries_txn( self, txn: LoggingTransaction, destinations: Iterable[str], room_id: str, stream_ordering: int, ) -> None: # ensure we have a `destinations` row for this destination, as there is # a foreign key constraint. if isinstance(self.database_engine, PostgresEngine): q = """ INSERT INTO destinations (destination) VALUES (?) ON CONFLICT DO NOTHING; """ elif isinstance(self.database_engine, Sqlite3Engine): q = """ INSERT OR IGNORE INTO destinations (destination) VALUES (?); """ else: raise RuntimeError("Unknown database engine") txn.execute_batch(q, ((destination, ) for destination in destinations)) rows = [(destination, room_id) for destination in destinations] self.db_pool.simple_upsert_many_txn( txn, "destination_rooms", ["destination", "room_id"], rows, ["stream_ordering"], [(stream_ordering, )] * len(rows), )
def _delete_url_cache_txn(txn: LoggingTransaction) -> None: txn.execute_batch(sql, [(media_id,) for media_id in media_ids])
def _purge_unreferenced_state_groups( self, txn: LoggingTransaction, room_id: str, state_groups_to_delete: Collection[int], ) -> None: logger.info("[purge] found %i state groups to delete", len(state_groups_to_delete)) rows = self.db_pool.simple_select_many_txn( txn, table="state_group_edges", column="prev_state_group", iterable=state_groups_to_delete, keyvalues={}, retcols=("state_group", ), ) remaining_state_groups = { row["state_group"] for row in rows if row["state_group"] not in state_groups_to_delete } logger.info( "[purge] de-delta-ing %i remaining state groups", len(remaining_state_groups), ) # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. for sg in remaining_state_groups: logger.info("[purge] de-delta-ing remaining state group %s", sg) curr_state_by_group = self._get_state_groups_from_groups_txn( txn, [sg]) curr_state = curr_state_by_group[sg] self.db_pool.simple_delete_txn(txn, table="state_groups_state", keyvalues={"state_group": sg}) self.db_pool.simple_delete_txn(txn, table="state_group_edges", keyvalues={"state_group": sg}) self.db_pool.simple_insert_many_txn( txn, table="state_groups_state", values=[{ "state_group": sg, "room_id": room_id, "type": key[0], "state_key": key[1], "event_id": state_id, } for key, state_id in curr_state.items()], ) logger.info("[purge] removing redundant state groups") txn.execute_batch( "DELETE FROM state_groups_state WHERE state_group = ?", ((sg, ) for sg in state_groups_to_delete), ) txn.execute_batch( "DELETE FROM state_groups WHERE id = ?", ((sg, ) for sg in state_groups_to_delete), )
def _rotate_notifs_before_txn(self, txn: LoggingTransaction, rotate_to_stream_ordering: int) -> None: old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn( txn, table="event_push_summary_stream_ordering", keyvalues={}, retcol="stream_ordering", ) # Calculate the new counts that should be upserted into event_push_summary sql = """ SELECT user_id, room_id, coalesce(old.%s, 0) + upd.cnt, upd.stream_ordering, old.user_id FROM ( SELECT user_id, room_id, count(*) as cnt, max(stream_ordering) as stream_ordering FROM event_push_actions WHERE ? <= stream_ordering AND stream_ordering < ? AND highlight = 0 AND %s = 1 GROUP BY user_id, room_id ) AS upd LEFT JOIN event_push_summary AS old USING (user_id, room_id) """ # First get the count of unread messages. txn.execute( sql % ("unread_count", "unread"), (old_rotate_stream_ordering, rotate_to_stream_ordering), ) # We need to merge results from the two requests (the one that retrieves the # unread count and the one that retrieves the notifications count) into a single # object because we might not have the same amount of rows in each of them. To do # this, we use a dict indexed on the user ID and room ID to make it easier to # populate. summaries: Dict[Tuple[str, str], _EventPushSummary] = {} for row in txn: summaries[(row[0], row[1])] = _EventPushSummary( unread_count=row[2], stream_ordering=row[3], old_user_id=row[4], notif_count=0, ) # Then get the count of notifications. txn.execute( sql % ("notif_count", "notif"), (old_rotate_stream_ordering, rotate_to_stream_ordering), ) for row in txn: if (row[0], row[1]) in summaries: summaries[(row[0], row[1])].notif_count = row[2] else: # Because the rules on notifying are different than the rules on marking # a message unread, we might end up with messages that notify but aren't # marked unread, so we might not have a summary for this (user, room) # tuple to complete. summaries[(row[0], row[1])] = _EventPushSummary( unread_count=0, stream_ordering=row[3], old_user_id=row[4], notif_count=row[2], ) logger.info("Rotating notifications, handling %d rows", len(summaries)) # If the `old.user_id` above is NULL then we know there isn't already an # entry in the table, so we simply insert it. Otherwise we update the # existing table. self.db_pool.simple_insert_many_txn( txn, table="event_push_summary", keys=( "user_id", "room_id", "notif_count", "unread_count", "stream_ordering", ), values=[( user_id, room_id, summary.notif_count, summary.unread_count, summary.stream_ordering, ) for ((user_id, room_id), summary) in summaries.items() if summary.old_user_id is None], ) txn.execute_batch( """ UPDATE event_push_summary SET notif_count = ?, unread_count = ?, stream_ordering = ? WHERE user_id = ? AND room_id = ? """, (( summary.notif_count, summary.unread_count, summary.stream_ordering, user_id, room_id, ) for ((user_id, room_id), summary) in summaries.items() if summary.old_user_id is not None), ) txn.execute( "DELETE FROM event_push_actions" " WHERE ? <= stream_ordering AND stream_ordering < ? AND highlight = 0", (old_rotate_stream_ordering, rotate_to_stream_ordering), ) logger.info("Rotating notifications, deleted %s push actions", txn.rowcount) txn.execute( "UPDATE event_push_summary_stream_ordering SET stream_ordering = ?", (rotate_to_stream_ordering, ), )
def _purge_history_txn( self, txn: LoggingTransaction, room_id: str, token: RoomStreamToken, delete_local_events: bool, ) -> Set[int]: # Tables that should be pruned: # event_auth # event_backward_extremities # event_edges # event_forward_extremities # event_json # event_push_actions # event_relations # event_search # event_to_state_groups # events # rejections # room_depth # state_groups # state_groups_state # destination_rooms # we will build a temporary table listing the events so that we don't # have to keep shovelling the list back and forth across the # connection. Annoyingly the python sqlite driver commits the # transaction on CREATE, so let's do this first. # # furthermore, we might already have the table from a previous (failed) # purge attempt, so let's drop the table first. txn.execute("DROP TABLE IF EXISTS events_to_purge") txn.execute("CREATE TEMPORARY TABLE events_to_purge (" " event_id TEXT NOT NULL," " should_delete BOOLEAN NOT NULL" ")") # First ensure that we're not about to delete all the forward extremeties txn.execute( "SELECT e.event_id, e.depth FROM events as e " "INNER JOIN event_forward_extremities as f " "ON e.event_id = f.event_id " "AND e.room_id = f.room_id " "WHERE f.room_id = ?", (room_id, ), ) rows = txn.fetchall() # if we already have no forwards extremities (for example because they were # cleared out by the `delete_old_current_state_events` background database # update), then we may as well carry on. if rows: max_depth = max(row[1] for row in rows) if max_depth < token.topological: # We need to ensure we don't delete all the events from the database # otherwise we wouldn't be able to send any events (due to not # having any backwards extremities) raise SynapseError( 400, "topological_ordering is greater than forward extremities") logger.info("[purge] looking for events to delete") should_delete_expr = "state_events.state_key IS NULL" should_delete_params: Tuple[Any, ...] = () if not delete_local_events: should_delete_expr += " AND event_id NOT LIKE ?" # We include the parameter twice since we use the expression twice should_delete_params += ("%:" + self.hs.hostname, "%:" + self.hs.hostname) should_delete_params += (room_id, token.topological) # Note that we insert events that are outliers and aren't going to be # deleted, as nothing will happen to them. txn.execute( "INSERT INTO events_to_purge" " SELECT event_id, %s" " FROM events AS e LEFT JOIN state_events USING (event_id)" " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?" % (should_delete_expr, should_delete_expr), should_delete_params, ) # We create the indices *after* insertion as that's a lot faster. # create an index on should_delete because later we'll be looking for # the should_delete / shouldn't_delete subsets txn.execute("CREATE INDEX events_to_purge_should_delete" " ON events_to_purge(should_delete)") # We do joins against events_to_purge for e.g. calculating state # groups to purge, etc., so lets make an index. txn.execute( "CREATE INDEX events_to_purge_id ON events_to_purge(event_id)") txn.execute("SELECT event_id, should_delete FROM events_to_purge") event_rows = txn.fetchall() logger.info( "[purge] found %i events before cutoff, of which %i can be deleted", len(event_rows), sum(1 for e in event_rows if e[1]), ) logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremities by finding # events to be purged that are pointed to by events we're not going to # purge. txn.execute( "SELECT DISTINCT e.event_id FROM events_to_purge AS e" " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id" " LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id" " WHERE ep2.event_id IS NULL") new_backwards_extrems = txn.fetchall() logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems) txn.execute("DELETE FROM event_backward_extremities WHERE room_id = ?", (room_id, )) # Update backward extremeties txn.execute_batch( "INSERT INTO event_backward_extremities (room_id, event_id)" " VALUES (?, ?)", [(room_id, event_id) for event_id, in new_backwards_extrems], ) logger.info( "[purge] finding state groups referenced by deleted events") # Get all state groups that are referenced by events that are to be # deleted. txn.execute(""" SELECT DISTINCT state_group FROM events_to_purge INNER JOIN event_to_state_groups USING (event_id) """) referenced_state_groups = {sg for sg, in txn} logger.info("[purge] found %i referenced state groups", len(referenced_state_groups)) logger.info("[purge] removing events from event_to_state_groups") txn.execute("DELETE FROM event_to_state_groups " "WHERE event_id IN (SELECT event_id from events_to_purge)") # Delete all remote non-state events for table in ( "events", "event_json", "event_auth", "event_edges", "event_forward_extremities", "event_relations", "event_search", "rejections", "redactions", ): logger.info("[purge] removing events from %s", table) txn.execute( "DELETE FROM %s WHERE event_id IN (" " SELECT event_id FROM events_to_purge WHERE should_delete" ")" % (table, )) # event_push_actions lacks an index on event_id, and has one on # (room_id, event_id) instead. for table in ("event_push_actions", ): logger.info("[purge] removing events from %s", table) txn.execute( "DELETE FROM %s WHERE room_id = ? AND event_id IN (" " SELECT event_id FROM events_to_purge WHERE should_delete" ")" % (table, ), (room_id, ), ) # Mark all state and own events as outliers logger.info("[purge] marking remaining events as outliers") txn.execute( "UPDATE events SET outlier = ?" " WHERE event_id IN (" " SELECT event_id FROM events_to_purge " " WHERE NOT should_delete" ")", (True, ), ) # synapse tries to take out an exclusive lock on room_depth whenever it # persists events (because upsert), and once we run this update, we # will block that for the rest of our transaction. # # So, let's stick it at the end so that we don't block event # persistence. # # We do this by calculating the minimum depth of the backwards # extremities. However, the events in event_backward_extremities # are ones we don't have yet so we need to look at the events that # point to it via event_edges table. txn.execute( """ SELECT COALESCE(MIN(depth), 0) FROM event_backward_extremities AS eb INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id INNER JOIN events AS e ON e.event_id = eg.event_id WHERE eb.room_id = ? """, (room_id, ), ) (min_depth, ) = cast(Tuple[int], txn.fetchone()) logger.info("[purge] updating room_depth to %d", min_depth) txn.execute( "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", (min_depth, room_id), ) # finally, drop the temp table. this will commit the txn in sqlite, # so make sure to keep this actually last. txn.execute("DROP TABLE events_to_purge") for event_id, should_delete in event_rows: self._invalidate_cache_and_stream(txn, self._get_state_group_for_event, (event_id, )) # XXX: This is racy, since have_seen_events could be called between the # transaction completing and the invalidation running. On the other hand, # that's no different to calling `have_seen_events` just before the # event is deleted from the database. if should_delete: self._invalidate_cache_and_stream(txn, self.have_seen_event, (room_id, event_id)) self._invalidate_get_event_cache(event_id) logger.info("[purge] done") return referenced_state_groups