Пример #1
0
    def _add_device_change_to_stream_txn(
        self,
        txn: LoggingTransaction,
        user_id: str,
        device_ids: Collection[str],
        stream_ids: List[str],
    ):
        txn.call_after(
            self._device_list_stream_cache.entity_has_changed, user_id, stream_ids[-1],
        )

        min_stream_id = stream_ids[0]

        # Delete older entries in the table, as we really only care about
        # when the latest change happened.
        txn.executemany(
            """
            DELETE FROM device_lists_stream
            WHERE user_id = ? AND device_id = ? AND stream_id < ?
            """,
            [(user_id, device_id, min_stream_id) for device_id in device_ids],
        )

        self.db.simple_insert_many_txn(
            txn,
            table="device_lists_stream",
            values=[
                {"stream_id": stream_id, "user_id": user_id, "device_id": device_id}
                for stream_id, device_id in zip(stream_ids, device_ids)
            ],
        )
        def purged_chain_cover_txn(txn: LoggingTransaction) -> int:
            # The event ID from events will be null if the chain ID / sequence
            # number points to a purged event.
            sql = """
                SELECT event_id, chain_id, sequence_number, e.event_id IS NOT NULL
                FROM event_auth_chains
                LEFT JOIN events AS e USING (event_id)
                WHERE event_id > ? ORDER BY event_auth_chains.event_id ASC LIMIT ?
            """
            txn.execute(sql, (current_event_id, batch_size))

            rows = txn.fetchall()
            if not rows:
                return 0

            # The event IDs and chain IDs / sequence numbers where the event has
            # been purged.
            unreferenced_event_ids = []
            unreferenced_chain_id_tuples = []
            event_id = ""
            for event_id, chain_id, sequence_number, has_event in rows:
                if not has_event:
                    unreferenced_event_ids.append((event_id, ))
                    unreferenced_chain_id_tuples.append(
                        (chain_id, sequence_number))

            # Delete the unreferenced auth chains from event_auth_chain_links and
            # event_auth_chains.
            txn.executemany(
                """
                DELETE FROM event_auth_chains WHERE event_id = ?
                """,
                unreferenced_event_ids,
            )
            # We should also delete matching target_*, but there is no index on
            # target_chain_id. Hopefully any purged events are due to a room
            # being fully purged and they will be removed from the origin_*
            # searches.
            txn.executemany(
                """
                DELETE FROM event_auth_chain_links WHERE
                origin_chain_id = ? AND origin_sequence_number = ?
                """,
                unreferenced_chain_id_tuples,
            )

            progress = {
                "current_event_id": event_id,
            }

            self.db_pool.updates._background_update_progress_txn(
                txn, "purged_chain_cover", progress)

            return len(rows)
Пример #3
0
    def insert_many_txn(self, txn: LoggingTransaction, table: str,
                        headers: List[str], rows: List[Tuple]) -> None:
        sql = "INSERT INTO %s (%s) VALUES (%s)" % (
            table,
            ", ".join(k for k in headers),
            ", ".join("%s" for _ in headers),
        )

        try:
            txn.executemany(sql, rows)
        except Exception:
            logger.exception("Failed to insert: %s", table)
            raise
Пример #4
0
                def insert(txn: LoggingTransaction) -> None:
                    sql = (
                        "INSERT INTO event_search (event_id, room_id, key,"
                        " sender, vector, origin_server_ts, stream_ordering)"
                        " VALUES (?,?,?,?,to_tsvector('english', ?),?,?)")

                    rows_dict = []
                    for row in rows:
                        d = dict(zip(headers, row))
                        if "\0" in d["value"]:
                            logger.warning("dropping search row %s", d)
                        else:
                            rows_dict.append(d)

                    txn.executemany(
                        sql,
                        [(
                            row["event_id"],
                            row["room_id"],
                            row["key"],
                            row["sender"],
                            row["value"],
                            row["origin_server_ts"],
                            row["stream_ordering"],
                        ) for row in rows_dict],
                    )

                    self.postgres_store.db_pool.simple_update_one_txn(
                        txn,
                        table="port_from_sqlite3",
                        keyvalues={"table_name": "event_search"},
                        updatevalues={
                            "forward_rowid": forward_chunk,
                            "backward_rowid": backward_chunk,
                        },
                    )
Пример #5
0
    def _purge_room_txn(self, txn: LoggingTransaction,
                        room_id: str) -> List[int]:
        # First, fetch all the state groups that should be deleted, before
        # we delete that information.
        txn.execute(
            """
                SELECT DISTINCT state_group FROM events
                INNER JOIN event_to_state_groups USING(event_id)
                WHERE events.room_id = ?
            """,
            (room_id, ),
        )

        state_groups = [row[0] for row in txn]

        # Get all the auth chains that are referenced by events that are to be
        # deleted.
        txn.execute(
            """
            SELECT chain_id, sequence_number FROM events
            LEFT JOIN event_auth_chains USING (event_id)
            WHERE room_id = ?
            """,
            (room_id, ),
        )
        referenced_chain_id_tuples = list(txn)

        logger.info("[purge] removing events from event_auth_chain_links")
        txn.executemany(
            """
            DELETE FROM event_auth_chain_links WHERE
            origin_chain_id = ? AND origin_sequence_number = ?
            """,
            referenced_chain_id_tuples,
        )

        # Now we delete tables which lack an index on room_id but have one on event_id
        for table in (
                "event_auth",
                "event_edges",
                "event_json",
                "event_push_actions_staging",
                "event_relations",
                "event_to_state_groups",
                "event_auth_chains",
                "event_auth_chain_to_calculate",
                "redactions",
                "rejections",
                "state_events",
        ):
            logger.info("[purge] removing %s from %s", room_id, table)

            txn.execute(
                """
                DELETE FROM %s WHERE event_id IN (
                  SELECT event_id FROM events WHERE room_id=?
                )
                """ % (table, ),
                (room_id, ),
            )

        # next, the tables with an index on room_id (or no useful index)
        for table in (
                "current_state_events",
                "destination_rooms",
                "event_backward_extremities",
                "event_forward_extremities",
                "event_push_actions",
                "event_search",
                "partial_state_events",
                "events",
                "federation_inbound_events_staging",
                "local_current_membership",
                "partial_state_rooms_servers",
                "partial_state_rooms",
                "receipts_graph",
                "receipts_linearized",
                "room_aliases",
                "room_depth",
                "room_memberships",
                "room_stats_state",
                "room_stats_current",
                "room_stats_earliest_token",
                "stream_ordering_to_exterm",
                "users_in_public_rooms",
                "users_who_share_private_rooms",
                # no useful index, but let's clear them anyway
                "appservice_room_list",
                "e2e_room_keys",
                "event_push_summary",
                "pusher_throttle",
                "room_account_data",
                "room_tags",
                # "rooms" happens last, to keep the foreign keys in the other tables
                # happy
                "rooms",
        ):
            logger.info("[purge] removing %s from %s", room_id, table)
            txn.execute("DELETE FROM %s WHERE room_id=?" % (table, ),
                        (room_id, ))

        # Other tables we do NOT need to clear out:
        #
        #  - blocked_rooms
        #    This is important, to make sure that we don't accidentally rejoin a blocked
        #    room after it was purged
        #
        #  - user_directory
        #    This has a room_id column, but it is unused
        #

        # Other tables that we might want to consider clearing out include:
        #
        #  - event_reports
        #       Given that these are intended for abuse management my initial
        #       inclination is to leave them in place.
        #
        #  - current_state_delta_stream
        #  - ex_outlier_stream
        #  - room_tags_revisions
        #       The problem with these is that they are largeish and there is no room_id
        #       index on them. In any case we should be clearing out 'stream' tables
        #       periodically anyway (#5888)

        # TODO: we could probably usefully do a bunch more cache invalidation here

        # XXX: as with purge_history, this is racy, but no worse than other races
        #   that already exist.
        self._invalidate_cache_and_stream(txn, self.have_seen_event,
                                          (room_id, ))

        logger.info("[purge] done")

        return state_groups