Exemplo n.º 1
0
    def process_message(
        self,
        message: Tuple[int, str, InsertEvent, Any],
        metadata: KafkaMessageMetadata,
    ) -> Optional[ProcessedMessage]:
        """\
        Process a raw message into an insertion or replacement batch. Returns
        `None` if the event is too old to be written.
        """
        version = message[0]
        if version != 2:
            raise InvalidMessageVersion(
                f"Unsupported message version: {version}")

        # version 2: (2, type, data, [state])
        type_, event = message[1:3]
        if type_ == "insert":
            try:
                row = self.process_insert(event, metadata)
            except EventTooOld:
                return None

            if row is None:  # the processor cannot/does not handle this input
                return None

            return InsertBatch([row], None)
        elif type_ in REPLACEMENT_EVENT_TYPES:
            # pass raw events along to republish
            return ReplacementBatch(str(event["project_id"]), [message])
        else:
            raise InvalidMessageType(f"Invalid message type: {type_}")
Exemplo n.º 2
0
    def process_message(self, message: ReplacementMessage) -> Optional[Replacement]:
        type_ = message.action_type
        event = message.data

        if type_ in (
            "start_delete_groups",
            "start_merge",
            "start_unmerge",
            "start_delete_tag",
        ):
            return None
        elif type_ == "end_delete_groups":
            processed = process_delete_groups(event, self.__required_columns)
        elif type_ == "end_merge":
            processed = process_merge(event, self.__all_column_names)
        elif type_ == "end_unmerge":
            processed = process_unmerge(event, self.__all_column_names)
        elif type_ == "end_delete_tag":
            processed = process_delete_tag(
                event,
                self.get_write_schema(),
                self.__tag_column_map,
                self.__promoted_tags,
            )
        else:
            raise InvalidMessageType("Invalid message type: {}".format(type_))

        return processed
Exemplo n.º 3
0
    def process_message(self, message: KafkaMessage) -> Optional[Replacement]:
        message = json.loads(message.value)
        version = message[0]

        if version == 2:
            type_, event = message[1:3]

            if type_ in ('start_delete_groups', 'start_merge', 'start_unmerge',
                         'start_delete_tag'):
                return None
            elif type_ == 'end_delete_groups':
                processed = process_delete_groups(event,
                                                  self.__required_columns)
            elif type_ == 'end_merge':
                processed = process_merge(event, self.__all_column_names)
            elif type_ == 'end_unmerge':
                processed = process_unmerge(event, self.__all_column_names)
            elif type_ == 'end_delete_tag':
                processed = process_delete_tag(event, self.dataset)
            else:
                raise InvalidMessageType(
                    "Invalid message type: {}".format(type_))
        else:
            raise InvalidMessageVersion("Unknown message format: " +
                                        str(message))

        return processed
Exemplo n.º 4
0
    def process_message(self, message: ReplacementMessage) -> Optional[Replacement]:
        type_ = message.action_type

        attributes_json = json.dumps({"message_type": type_, **message.data})
        logger.info(attributes_json)

        if type_ in REPLACEMENT_EVENT_TYPES:
            metrics.increment(
                "process",
                1,
                tags={"type": type_, "consumer_group": message.metadata.consumer_group},
            )

        if type_ in (
            ReplacementType.START_DELETE_GROUPS,
            ReplacementType.START_MERGE,
            ReplacementType.START_UNMERGE,
            ReplacementType.START_UNMERGE_HIERARCHICAL,
            ReplacementType.START_DELETE_TAG,
        ):
            return None
        elif type_ == ReplacementType.END_DELETE_GROUPS:
            processed = process_delete_groups(message, self.__required_columns)
        elif type_ == ReplacementType.END_MERGE:
            processed = process_merge(message, self.__all_columns)
        elif type_ == ReplacementType.END_UNMERGE:
            processed = UnmergeGroupsReplacement.parse_message(
                message, self.__replacement_context
            )
        elif type_ == ReplacementType.END_UNMERGE_HIERARCHICAL:
            processed = process_unmerge_hierarchical(
                message, self.__all_columns, self.__state_name
            )
        elif type_ == ReplacementType.END_DELETE_TAG:
            processed = process_delete_tag(
                message,
                self.__all_columns,
                self.__tag_column_map,
                self.__promoted_tags,
                self.__use_promoted_prewhere,
                self.__schema,
            )
        elif type_ == ReplacementType.TOMBSTONE_EVENTS:
            processed = process_tombstone_events(
                message, self.__required_columns, self.__state_name
            )
        elif type_ == ReplacementType.REPLACE_GROUP:
            processed = process_replace_group(
                message, self.__all_columns, self.__state_name
            )
        elif type_ == ReplacementType.EXCLUDE_GROUPS:
            processed = ExcludeGroupsReplacement.parse_message(
                message, self.__replacement_context
            )
        else:
            raise InvalidMessageType("Invalid message type: {}".format(type_))

        return processed
Exemplo n.º 5
0
    def process_message(self,
                        message: ReplacementMessage) -> Optional[Replacement]:
        type_ = message.action_type
        event = message.data

        if type_ in REPLACEMENT_EVENT_TYPES:
            metrics.increment("process", 1, tags={"type": type_})

        if type_ in (
                "start_delete_groups",
                "start_merge",
                "start_unmerge",
                "start_unmerge_hierarchical",
                "start_delete_tag",
        ):
            return None
        elif type_ == "end_delete_groups":
            processed = process_delete_groups(event, self.__required_columns)
        elif type_ == "end_merge":
            processed = process_merge(event, self.__all_columns)
        elif type_ == "end_unmerge":
            processed = UnmergeGroupsReplacement.parse_message(
                event, self.__replacement_context)
        elif type_ == "end_unmerge_hierarchical":
            processed = process_unmerge_hierarchical(event, self.__all_columns,
                                                     self.__state_name)
        elif type_ == "end_delete_tag":
            processed = process_delete_tag(
                event,
                self.__all_columns,
                self.__tag_column_map,
                self.__promoted_tags,
                self.__use_promoted_prewhere,
                self.__schema,
            )
        elif type_ == "tombstone_events":
            processed = process_tombstone_events(event,
                                                 self.__required_columns,
                                                 self.__state_name)
        elif type_ == "replace_group":
            processed = process_replace_group(event, self.__all_columns,
                                              self.__state_name)
        elif type_ == "exclude_groups":
            processed = ExcludeGroupsReplacement.parse_message(
                event, self.__replacement_context)
        else:
            raise InvalidMessageType("Invalid message type: {}".format(type_))

        return processed
Exemplo n.º 6
0
    def process_message(self,
                        message: ReplacementMessage) -> Optional[Replacement]:
        type_ = message.action_type
        event = message.data

        if type_ in (
                "start_delete_groups",
                "start_merge",
                "start_unmerge",
                "start_delete_tag",
        ):
            return None
        elif type_ == "end_delete_groups":
            processed = process_delete_groups(event, self.__required_columns)
        elif type_ == "end_merge":
            processed = process_merge(event, self.__all_columns)
        elif type_ == "end_unmerge":
            processed = process_unmerge(event, self.__all_columns,
                                        self.__state_name)
        elif type_ == "end_delete_tag":
            processed = process_delete_tag(
                event,
                self.__all_columns,
                self.__tag_column_map,
                self.__promoted_tags,
                self.__use_promoted_prewhere,
                self.__schema,
            )
        elif type_ == "tombstone_events":
            processed = process_tombstone_events(event,
                                                 self.__required_columns)
        elif type_ == "replace_group":
            processed = process_replace_group(event, self.__all_columns)
        elif type_ == "exclude_groups":
            processed = process_exclude_groups(event)
        else:
            raise InvalidMessageType("Invalid message type: {}".format(type_))

        return processed
Exemplo n.º 7
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        """\
        Process a raw message into a tuple of (action_type, processed_message):
        * action_type: one of the sentinel values INSERT or REPLACE
        * processed_message: dict representing the processed column -> value(s)

        Returns `None` if the event is too old to be written.
        """
        action_type = None

        if isinstance(message, dict):
            # deprecated unwrapped event message == insert
            action_type = ProcessorAction.INSERT
            try:
                processed = self.process_insert(message, metadata)
            except EventTooOld:
                return None
        elif isinstance(message, (list, tuple)) and len(message) >= 2:
            version = message[0]

            if version in (0, 1, 2):
                # version 0: (0, 'insert', data)
                # version 1: (1, type, data, [state])
                #   NOTE: types 'delete_groups', 'merge' and 'unmerge' are ignored
                # version 2: (2, type, data, [state])
                type_, event = message[1:3]
                if type_ == 'insert':
                    action_type = ProcessorAction.INSERT
                    try:
                        processed = self.process_insert(event, metadata)
                    except EventTooOld:
                        return None
                else:
                    if version == 0:
                        raise InvalidMessageType(
                            "Invalid message type: {}".format(type_))
                    elif version == 1:
                        if type_ in ('delete_groups', 'merge', 'unmerge'):
                            # these didn't contain the necessary data to handle replacements
                            return None
                        else:
                            raise InvalidMessageType(
                                "Invalid message type: {}".format(type_))
                    elif version == 2:
                        # we temporarily sent these invalid message types from Sentry
                        if type_ in ('delete_groups', 'merge'):
                            return None

                        if type_ in ('start_delete_groups', 'start_merge',
                                     'start_unmerge', 'start_delete_tag',
                                     'end_delete_groups', 'end_merge',
                                     'end_unmerge', 'end_delete_tag'):
                            # pass raw events along to republish
                            action_type = ProcessorAction.REPLACE
                            processed = (str(event['project_id']), message)
                        else:
                            raise InvalidMessageType(
                                "Invalid message type: {}".format(type_))

        if action_type is None:
            raise InvalidMessageVersion("Unknown message format: " +
                                        str(message))

        if processed is None:
            return None

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )