Exemplo n.º 1
0
        def _delete_url_cache_media_txn(txn: LoggingTransaction) -> None:
            sql = "DELETE FROM local_media_repository WHERE media_id = ?"

            txn.execute_batch(sql, [(media_id,) for media_id in media_ids])

            sql = "DELETE FROM local_media_repository_thumbnails WHERE media_id = ?"

            txn.execute_batch(sql, [(media_id,) for media_id in media_ids])
Exemplo n.º 2
0
        def _devices_last_seen_update_txn(txn: LoggingTransaction) -> int:
            # This consists of two queries:
            #
            #   1. The sub-query searches for the next N devices and joins
            #      against user_ips to find the max last_seen associated with
            #      that device.
            #   2. The outer query then joins again against user_ips on
            #      user/device/last_seen. This *should* hopefully only
            #      return one row, but if it does return more than one then
            #      we'll just end up updating the same device row multiple
            #      times, which is fine.

            where_args: List[Union[str, int]]
            where_clause, where_args = make_tuple_comparison_clause(
                [("user_id", last_user_id), ("device_id", last_device_id)], )

            sql = """
                SELECT
                    last_seen, ip, user_agent, user_id, device_id
                FROM (
                    SELECT
                        user_id, device_id, MAX(u.last_seen) AS last_seen
                    FROM devices
                    INNER JOIN user_ips AS u USING (user_id, device_id)
                    WHERE %(where_clause)s
                    GROUP BY user_id, device_id
                    ORDER BY user_id ASC, device_id ASC
                    LIMIT ?
                ) c
                INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
            """ % {
                "where_clause": where_clause
            }
            txn.execute(sql, where_args + [batch_size])

            rows = cast(List[Tuple[int, str, str, str, str]], txn.fetchall())
            if not rows:
                return 0

            sql = """
                UPDATE devices
                SET last_seen = ?, ip = ?, user_agent = ?
                WHERE user_id = ? AND device_id = ?
            """
            txn.execute_batch(sql, rows)

            _, _, _, user_id, device_id = rows[-1]
            self.db_pool.updates._background_update_progress_txn(
                txn,
                "devices_last_seen",
                {
                    "last_user_id": user_id,
                    "last_device_id": device_id
                },
            )

            return len(rows)
        def reindex_search_txn(txn: LoggingTransaction) -> int:
            sql = ("SELECT stream_ordering, event_id FROM events"
                   " WHERE ? <= stream_ordering AND stream_ordering < ?"
                   " ORDER BY stream_ordering DESC"
                   " LIMIT ?")

            txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))

            rows = txn.fetchall()
            if not rows:
                return 0

            min_stream_id = rows[-1][0]
            event_ids = [row[1] for row in rows]

            rows_to_update = []

            chunks = [
                event_ids[i:i + 100] for i in range(0, len(event_ids), 100)
            ]
            for chunk in chunks:
                ev_rows = self.db_pool.simple_select_many_txn(
                    txn,
                    table="event_json",
                    column="event_id",
                    iterable=chunk,
                    retcols=["event_id", "json"],
                    keyvalues={},
                )

                for row in ev_rows:
                    event_id = row["event_id"]
                    event_json = db_to_json(row["json"])
                    try:
                        origin_server_ts = event_json["origin_server_ts"]
                    except (KeyError, AttributeError):
                        # If the event is missing a necessary field then
                        # skip over it.
                        continue

                    rows_to_update.append((origin_server_ts, event_id))

            sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?"

            txn.execute_batch(sql, rows_to_update)

            progress = {
                "target_min_stream_id_inclusive": target_min_stream_id,
                "max_stream_id_exclusive": min_stream_id,
                "rows_inserted": rows_inserted + len(rows_to_update),
            }

            self.db_pool.updates._background_update_progress_txn(
                txn, _BackgroundUpdates.EVENT_ORIGIN_SERVER_TS_NAME, progress)

            return len(rows_to_update)
Exemplo n.º 4
0
        def update_cache_txn(txn: LoggingTransaction) -> None:
            sql = ("UPDATE remote_media_cache SET last_access_ts = ?"
                   " WHERE media_origin = ? AND media_id = ?")

            txn.execute_batch(
                sql,
                ((time_ms, media_origin, media_id)
                 for media_origin, media_id in remote_media),
            )

            sql = ("UPDATE local_media_repository SET last_access_ts = ?"
                   " WHERE media_id = ?")

            txn.execute_batch(sql, ((time_ms, media_id)
                                    for media_id in local_media))
        def _add_push_actions_to_staging_txn(txn: LoggingTransaction) -> None:
            # We don't use simple_insert_many here to avoid the overhead
            # of generating lists of dicts.

            sql = """
                INSERT INTO event_push_actions_staging
                    (event_id, user_id, actions, notif, highlight, unread)
                VALUES (?, ?, ?, ?, ?, ?)
            """

            txn.execute_batch(
                sql,
                (_gen_entry(user_id, actions)
                 for user_id, actions in user_id_actions.items()),
            )
        def reindex_txn(txn: LoggingTransaction) -> int:
            sql = ("SELECT stream_ordering, event_id, json FROM events"
                   " INNER JOIN event_json USING (event_id)"
                   " WHERE ? <= stream_ordering AND stream_ordering < ?"
                   " ORDER BY stream_ordering DESC"
                   " LIMIT ?")

            txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))

            rows = txn.fetchall()
            if not rows:
                return 0

            min_stream_id = rows[-1][0]

            update_rows = []
            for row in rows:
                try:
                    event_id = row[1]
                    event_json = db_to_json(row[2])
                    sender = event_json["sender"]
                    content = event_json["content"]

                    contains_url = "url" in content
                    if contains_url:
                        contains_url &= isinstance(content["url"], str)
                except (KeyError, AttributeError):
                    # If the event is missing a necessary field then
                    # skip over it.
                    continue

                update_rows.append((sender, contains_url, event_id))

            sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?"

            txn.execute_batch(sql, update_rows)

            progress = {
                "target_min_stream_id_inclusive": target_min_stream_id,
                "max_stream_id_exclusive": min_stream_id,
                "rows_inserted": rows_inserted + len(rows),
            }

            self.db_pool.updates._background_update_progress_txn(
                txn, _BackgroundUpdates.EVENT_FIELDS_SENDER_URL_UPDATE_NAME,
                progress)

            return len(rows)
Exemplo n.º 7
0
    def store_search_entries_txn(self, txn: LoggingTransaction,
                                 entries: Iterable[SearchEntry]) -> None:
        """Add entries to the search table

        Args:
            txn:
            entries: entries to be added to the table
        """
        if not self.hs.config.server.enable_search:
            return
        if isinstance(self.database_engine, PostgresEngine):
            sql = (
                "INSERT INTO event_search"
                " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)"
                " VALUES (?,?,?,to_tsvector('english', ?),?,?)")

            args1 = ((
                entry.event_id,
                entry.room_id,
                entry.key,
                _clean_value_for_search(entry.value),
                entry.stream_ordering,
                entry.origin_server_ts,
            ) for entry in entries)

            txn.execute_batch(sql, args1)

        elif isinstance(self.database_engine, Sqlite3Engine):
            sql = ("INSERT INTO event_search (event_id, room_id, key, value)"
                   " VALUES (?,?,?,?)")
            args2 = ((
                entry.event_id,
                entry.room_id,
                entry.key,
                _clean_value_for_search(entry.value),
            ) for entry in entries)
            txn.execute_batch(sql, args2)

        else:
            # This should be unreachable.
            raise Exception("Unrecognized database engine")
Exemplo n.º 8
0
    def _store_destination_rooms_entries_txn(
        self,
        txn: LoggingTransaction,
        destinations: Iterable[str],
        room_id: str,
        stream_ordering: int,
    ) -> None:

        # ensure we have a `destinations` row for this destination, as there is
        # a foreign key constraint.
        if isinstance(self.database_engine, PostgresEngine):
            q = """
                INSERT INTO destinations (destination)
                    VALUES (?)
                    ON CONFLICT DO NOTHING;
            """
        elif isinstance(self.database_engine, Sqlite3Engine):
            q = """
                INSERT OR IGNORE INTO destinations (destination)
                    VALUES (?);
            """
        else:
            raise RuntimeError("Unknown database engine")

        txn.execute_batch(q, ((destination, ) for destination in destinations))

        rows = [(destination, room_id) for destination in destinations]

        self.db_pool.simple_upsert_many_txn(
            txn,
            "destination_rooms",
            ["destination", "room_id"],
            rows,
            ["stream_ordering"],
            [(stream_ordering, )] * len(rows),
        )
Exemplo n.º 9
0
 def _delete_url_cache_txn(txn: LoggingTransaction) -> None:
     txn.execute_batch(sql, [(media_id,) for media_id in media_ids])
Exemplo n.º 10
0
    def _purge_unreferenced_state_groups(
        self,
        txn: LoggingTransaction,
        room_id: str,
        state_groups_to_delete: Collection[int],
    ) -> None:
        logger.info("[purge] found %i state groups to delete",
                    len(state_groups_to_delete))

        rows = self.db_pool.simple_select_many_txn(
            txn,
            table="state_group_edges",
            column="prev_state_group",
            iterable=state_groups_to_delete,
            keyvalues={},
            retcols=("state_group", ),
        )

        remaining_state_groups = {
            row["state_group"]
            for row in rows if row["state_group"] not in state_groups_to_delete
        }

        logger.info(
            "[purge] de-delta-ing %i remaining state groups",
            len(remaining_state_groups),
        )

        # Now we turn the state groups that reference to-be-deleted state
        # groups to non delta versions.
        for sg in remaining_state_groups:
            logger.info("[purge] de-delta-ing remaining state group %s", sg)
            curr_state_by_group = self._get_state_groups_from_groups_txn(
                txn, [sg])
            curr_state = curr_state_by_group[sg]

            self.db_pool.simple_delete_txn(txn,
                                           table="state_groups_state",
                                           keyvalues={"state_group": sg})

            self.db_pool.simple_delete_txn(txn,
                                           table="state_group_edges",
                                           keyvalues={"state_group": sg})

            self.db_pool.simple_insert_many_txn(
                txn,
                table="state_groups_state",
                values=[{
                    "state_group": sg,
                    "room_id": room_id,
                    "type": key[0],
                    "state_key": key[1],
                    "event_id": state_id,
                } for key, state_id in curr_state.items()],
            )

        logger.info("[purge] removing redundant state groups")
        txn.execute_batch(
            "DELETE FROM state_groups_state WHERE state_group = ?",
            ((sg, ) for sg in state_groups_to_delete),
        )
        txn.execute_batch(
            "DELETE FROM state_groups WHERE id = ?",
            ((sg, ) for sg in state_groups_to_delete),
        )
Exemplo n.º 11
0
    def _rotate_notifs_before_txn(self, txn: LoggingTransaction,
                                  rotate_to_stream_ordering: int) -> None:
        old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
            txn,
            table="event_push_summary_stream_ordering",
            keyvalues={},
            retcol="stream_ordering",
        )

        # Calculate the new counts that should be upserted into event_push_summary
        sql = """
            SELECT user_id, room_id,
                coalesce(old.%s, 0) + upd.cnt,
                upd.stream_ordering,
                old.user_id
            FROM (
                SELECT user_id, room_id, count(*) as cnt,
                    max(stream_ordering) as stream_ordering
                FROM event_push_actions
                WHERE ? <= stream_ordering AND stream_ordering < ?
                    AND highlight = 0
                    AND %s = 1
                GROUP BY user_id, room_id
            ) AS upd
            LEFT JOIN event_push_summary AS old USING (user_id, room_id)
        """

        # First get the count of unread messages.
        txn.execute(
            sql % ("unread_count", "unread"),
            (old_rotate_stream_ordering, rotate_to_stream_ordering),
        )

        # We need to merge results from the two requests (the one that retrieves the
        # unread count and the one that retrieves the notifications count) into a single
        # object because we might not have the same amount of rows in each of them. To do
        # this, we use a dict indexed on the user ID and room ID to make it easier to
        # populate.
        summaries: Dict[Tuple[str, str], _EventPushSummary] = {}
        for row in txn:
            summaries[(row[0], row[1])] = _EventPushSummary(
                unread_count=row[2],
                stream_ordering=row[3],
                old_user_id=row[4],
                notif_count=0,
            )

        # Then get the count of notifications.
        txn.execute(
            sql % ("notif_count", "notif"),
            (old_rotate_stream_ordering, rotate_to_stream_ordering),
        )

        for row in txn:
            if (row[0], row[1]) in summaries:
                summaries[(row[0], row[1])].notif_count = row[2]
            else:
                # Because the rules on notifying are different than the rules on marking
                # a message unread, we might end up with messages that notify but aren't
                # marked unread, so we might not have a summary for this (user, room)
                # tuple to complete.
                summaries[(row[0], row[1])] = _EventPushSummary(
                    unread_count=0,
                    stream_ordering=row[3],
                    old_user_id=row[4],
                    notif_count=row[2],
                )

        logger.info("Rotating notifications, handling %d rows", len(summaries))

        # If the `old.user_id` above is NULL then we know there isn't already an
        # entry in the table, so we simply insert it. Otherwise we update the
        # existing table.
        self.db_pool.simple_insert_many_txn(
            txn,
            table="event_push_summary",
            keys=(
                "user_id",
                "room_id",
                "notif_count",
                "unread_count",
                "stream_ordering",
            ),
            values=[(
                user_id,
                room_id,
                summary.notif_count,
                summary.unread_count,
                summary.stream_ordering,
            ) for ((user_id, room_id), summary) in summaries.items()
                    if summary.old_user_id is None],
        )

        txn.execute_batch(
            """
                UPDATE event_push_summary
                SET notif_count = ?, unread_count = ?, stream_ordering = ?
                WHERE user_id = ? AND room_id = ?
            """,
            ((
                summary.notif_count,
                summary.unread_count,
                summary.stream_ordering,
                user_id,
                room_id,
            ) for ((user_id, room_id), summary) in summaries.items()
             if summary.old_user_id is not None),
        )

        txn.execute(
            "DELETE FROM event_push_actions"
            " WHERE ? <= stream_ordering AND stream_ordering < ? AND highlight = 0",
            (old_rotate_stream_ordering, rotate_to_stream_ordering),
        )

        logger.info("Rotating notifications, deleted %s push actions",
                    txn.rowcount)

        txn.execute(
            "UPDATE event_push_summary_stream_ordering SET stream_ordering = ?",
            (rotate_to_stream_ordering, ),
        )
Exemplo n.º 12
0
    def _purge_history_txn(
        self,
        txn: LoggingTransaction,
        room_id: str,
        token: RoomStreamToken,
        delete_local_events: bool,
    ) -> Set[int]:
        # Tables that should be pruned:
        #     event_auth
        #     event_backward_extremities
        #     event_edges
        #     event_forward_extremities
        #     event_json
        #     event_push_actions
        #     event_relations
        #     event_search
        #     event_to_state_groups
        #     events
        #     rejections
        #     room_depth
        #     state_groups
        #     state_groups_state
        #     destination_rooms

        # we will build a temporary table listing the events so that we don't
        # have to keep shovelling the list back and forth across the
        # connection. Annoyingly the python sqlite driver commits the
        # transaction on CREATE, so let's do this first.
        #
        # furthermore, we might already have the table from a previous (failed)
        # purge attempt, so let's drop the table first.

        txn.execute("DROP TABLE IF EXISTS events_to_purge")

        txn.execute("CREATE TEMPORARY TABLE events_to_purge ("
                    "    event_id TEXT NOT NULL,"
                    "    should_delete BOOLEAN NOT NULL"
                    ")")

        # First ensure that we're not about to delete all the forward extremeties
        txn.execute(
            "SELECT e.event_id, e.depth FROM events as e "
            "INNER JOIN event_forward_extremities as f "
            "ON e.event_id = f.event_id "
            "AND e.room_id = f.room_id "
            "WHERE f.room_id = ?",
            (room_id, ),
        )
        rows = txn.fetchall()
        # if we already have no forwards extremities (for example because they were
        # cleared out by the `delete_old_current_state_events` background database
        # update), then we may as well carry on.
        if rows:
            max_depth = max(row[1] for row in rows)

            if max_depth < token.topological:
                # We need to ensure we don't delete all the events from the database
                # otherwise we wouldn't be able to send any events (due to not
                # having any backwards extremities)
                raise SynapseError(
                    400,
                    "topological_ordering is greater than forward extremities")

        logger.info("[purge] looking for events to delete")

        should_delete_expr = "state_events.state_key IS NULL"
        should_delete_params: Tuple[Any, ...] = ()
        if not delete_local_events:
            should_delete_expr += " AND event_id NOT LIKE ?"

            # We include the parameter twice since we use the expression twice
            should_delete_params += ("%:" + self.hs.hostname,
                                     "%:" + self.hs.hostname)

        should_delete_params += (room_id, token.topological)

        # Note that we insert events that are outliers and aren't going to be
        # deleted, as nothing will happen to them.
        txn.execute(
            "INSERT INTO events_to_purge"
            " SELECT event_id, %s"
            " FROM events AS e LEFT JOIN state_events USING (event_id)"
            " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?"
            % (should_delete_expr, should_delete_expr),
            should_delete_params,
        )

        # We create the indices *after* insertion as that's a lot faster.

        # create an index on should_delete because later we'll be looking for
        # the should_delete / shouldn't_delete subsets
        txn.execute("CREATE INDEX events_to_purge_should_delete"
                    " ON events_to_purge(should_delete)")

        # We do joins against events_to_purge for e.g. calculating state
        # groups to purge, etc., so lets make an index.
        txn.execute(
            "CREATE INDEX events_to_purge_id ON events_to_purge(event_id)")

        txn.execute("SELECT event_id, should_delete FROM events_to_purge")
        event_rows = txn.fetchall()
        logger.info(
            "[purge] found %i events before cutoff, of which %i can be deleted",
            len(event_rows),
            sum(1 for e in event_rows if e[1]),
        )

        logger.info("[purge] Finding new backward extremities")

        # We calculate the new entries for the backward extremities by finding
        # events to be purged that are pointed to by events we're not going to
        # purge.
        txn.execute(
            "SELECT DISTINCT e.event_id FROM events_to_purge AS e"
            " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id"
            " LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id"
            " WHERE ep2.event_id IS NULL")
        new_backwards_extrems = txn.fetchall()

        logger.info("[purge] replacing backward extremities: %r",
                    new_backwards_extrems)

        txn.execute("DELETE FROM event_backward_extremities WHERE room_id = ?",
                    (room_id, ))

        # Update backward extremeties
        txn.execute_batch(
            "INSERT INTO event_backward_extremities (room_id, event_id)"
            " VALUES (?, ?)",
            [(room_id, event_id) for event_id, in new_backwards_extrems],
        )

        logger.info(
            "[purge] finding state groups referenced by deleted events")

        # Get all state groups that are referenced by events that are to be
        # deleted.
        txn.execute("""
            SELECT DISTINCT state_group FROM events_to_purge
            INNER JOIN event_to_state_groups USING (event_id)
        """)

        referenced_state_groups = {sg for sg, in txn}
        logger.info("[purge] found %i referenced state groups",
                    len(referenced_state_groups))

        logger.info("[purge] removing events from event_to_state_groups")
        txn.execute("DELETE FROM event_to_state_groups "
                    "WHERE event_id IN (SELECT event_id from events_to_purge)")

        # Delete all remote non-state events
        for table in (
                "events",
                "event_json",
                "event_auth",
                "event_edges",
                "event_forward_extremities",
                "event_relations",
                "event_search",
                "rejections",
                "redactions",
        ):
            logger.info("[purge] removing events from %s", table)

            txn.execute(
                "DELETE FROM %s WHERE event_id IN ("
                "    SELECT event_id FROM events_to_purge WHERE should_delete"
                ")" % (table, ))

        # event_push_actions lacks an index on event_id, and has one on
        # (room_id, event_id) instead.
        for table in ("event_push_actions", ):
            logger.info("[purge] removing events from %s", table)

            txn.execute(
                "DELETE FROM %s WHERE room_id = ? AND event_id IN ("
                "    SELECT event_id FROM events_to_purge WHERE should_delete"
                ")" % (table, ),
                (room_id, ),
            )

        # Mark all state and own events as outliers
        logger.info("[purge] marking remaining events as outliers")
        txn.execute(
            "UPDATE events SET outlier = ?"
            " WHERE event_id IN ("
            "    SELECT event_id FROM events_to_purge "
            "    WHERE NOT should_delete"
            ")",
            (True, ),
        )

        # synapse tries to take out an exclusive lock on room_depth whenever it
        # persists events (because upsert), and once we run this update, we
        # will block that for the rest of our transaction.
        #
        # So, let's stick it at the end so that we don't block event
        # persistence.
        #
        # We do this by calculating the minimum depth of the backwards
        # extremities. However, the events in event_backward_extremities
        # are ones we don't have yet so we need to look at the events that
        # point to it via event_edges table.
        txn.execute(
            """
            SELECT COALESCE(MIN(depth), 0)
            FROM event_backward_extremities AS eb
            INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id
            INNER JOIN events AS e ON e.event_id = eg.event_id
            WHERE eb.room_id = ?
        """,
            (room_id, ),
        )
        (min_depth, ) = cast(Tuple[int], txn.fetchone())

        logger.info("[purge] updating room_depth to %d", min_depth)

        txn.execute(
            "UPDATE room_depth SET min_depth = ? WHERE room_id = ?",
            (min_depth, room_id),
        )

        # finally, drop the temp table. this will commit the txn in sqlite,
        # so make sure to keep this actually last.
        txn.execute("DROP TABLE events_to_purge")

        for event_id, should_delete in event_rows:
            self._invalidate_cache_and_stream(txn,
                                              self._get_state_group_for_event,
                                              (event_id, ))

            # XXX: This is racy, since have_seen_events could be called between the
            #    transaction completing and the invalidation running. On the other hand,
            #    that's no different to calling `have_seen_events` just before the
            #    event is deleted from the database.
            if should_delete:
                self._invalidate_cache_and_stream(txn, self.have_seen_event,
                                                  (room_id, event_id))
                self._invalidate_get_event_cache(event_id)

        logger.info("[purge] done")

        return referenced_state_groups