Exemplo n.º 1
0
    def process_message(
        self,
        message: Tuple[int, str, InsertEvent, Any],
        metadata: KafkaMessageMetadata,
    ) -> Optional[ProcessedMessage]:
        """\
        Process a raw message into an insertion or replacement batch. Returns
        `None` if the event is too old to be written.
        """
        version = message[0]
        if version != 2:
            raise InvalidMessageVersion(
                f"Unsupported message version: {version}")

        # version 2: (2, type, data, [state])
        type_, event = message[1:3]
        if type_ == "insert":
            try:
                row = self.process_insert(event, metadata)
            except EventTooOld:
                return None

            if row is None:  # the processor cannot/does not handle this input
                return None

            return InsertBatch([row], None)
        elif type_ in REPLACEMENT_EVENT_TYPES:
            # pass raw events along to republish
            return ReplacementBatch(str(event["project_id"]), [message])
        else:
            raise InvalidMessageType(f"Invalid message type: {type_}")
Exemplo n.º 2
0
    def process_message(self, message: KafkaMessage) -> Optional[Replacement]:
        message = json.loads(message.value)
        version = message[0]

        if version == 2:
            type_, event = message[1:3]

            if type_ in ('start_delete_groups', 'start_merge', 'start_unmerge',
                         'start_delete_tag'):
                return None
            elif type_ == 'end_delete_groups':
                processed = process_delete_groups(event,
                                                  self.__required_columns)
            elif type_ == 'end_merge':
                processed = process_merge(event, self.__all_column_names)
            elif type_ == 'end_unmerge':
                processed = process_unmerge(event, self.__all_column_names)
            elif type_ == 'end_delete_tag':
                processed = process_delete_tag(event, self.dataset)
            else:
                raise InvalidMessageType(
                    "Invalid message type: {}".format(type_))
        else:
            raise InvalidMessageVersion("Unknown message format: " +
                                        str(message))

        return processed
Exemplo n.º 3
0
    def process_message(
            self, message: Message[KafkaPayload]) -> Optional[Replacement]:
        metadata = ReplacementMessageMetadata(
            partition_index=message.partition.index,
            offset=message.offset,
            consumer_group=self.__consumer_group,
        )

        if self._message_already_processed(metadata):
            logger.warning(
                f"Replacer ignored a message, consumer group: {self.__consumer_group}",
                extra={
                    "partition": metadata.partition_index,
                    "offset": metadata.offset,
                },
            )
            if get_config("skip_seen_offsets", False):
                return None
        seq_message = json.loads(message.payload.value)
        [version, action_type, data] = seq_message

        if version == 2:
            return self.__replacer_processor.process_message(
                ReplacementMessage(
                    action_type=action_type,
                    data=data,
                    metadata=metadata,
                ))
        else:
            raise InvalidMessageVersion("Unknown message format: " +
                                        str(seq_message))
Exemplo n.º 4
0
    def process_message(
            self, message: Message[KafkaPayload]) -> Optional[Replacement]:
        seq_message = json.loads(message.payload.value)
        version = seq_message[0]

        if version == 2:
            return self.__replacer_processor.process_message(
                ReplacementMessage(seq_message[1], seq_message[2]))
        else:
            raise InvalidMessageVersion("Unknown message format: " +
                                        str(seq_message))
Exemplo n.º 5
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        """\
        Process a raw message into a tuple of (action_type, processed_message):
        * action_type: one of the sentinel values INSERT or REPLACE
        * processed_message: dict representing the processed column -> value(s)

        Returns `None` if the event is too old to be written.
        """
        action_type = None

        if isinstance(message, dict):
            # deprecated unwrapped event message == insert
            action_type = ProcessorAction.INSERT
            try:
                processed = self.process_insert(message, metadata)
            except EventTooOld:
                return None
        elif isinstance(message, (list, tuple)) and len(message) >= 2:
            version = message[0]

            if version in (0, 1, 2):
                # version 0: (0, 'insert', data)
                # version 1: (1, type, data, [state])
                #   NOTE: types 'delete_groups', 'merge' and 'unmerge' are ignored
                # version 2: (2, type, data, [state])
                type_, event = message[1:3]
                if type_ == 'insert':
                    action_type = ProcessorAction.INSERT
                    try:
                        processed = self.process_insert(event, metadata)
                    except EventTooOld:
                        return None
                else:
                    if version == 0:
                        raise InvalidMessageType(
                            "Invalid message type: {}".format(type_))
                    elif version == 1:
                        if type_ in ('delete_groups', 'merge', 'unmerge'):
                            # these didn't contain the necessary data to handle replacements
                            return None
                        else:
                            raise InvalidMessageType(
                                "Invalid message type: {}".format(type_))
                    elif version == 2:
                        # we temporarily sent these invalid message types from Sentry
                        if type_ in ('delete_groups', 'merge'):
                            return None

                        if type_ in ('start_delete_groups', 'start_merge',
                                     'start_unmerge', 'start_delete_tag',
                                     'end_delete_groups', 'end_merge',
                                     'end_unmerge', 'end_delete_tag'):
                            # pass raw events along to republish
                            action_type = ProcessorAction.REPLACE
                            processed = (str(event['project_id']), message)
                        else:
                            raise InvalidMessageType(
                                "Invalid message type: {}".format(type_))

        if action_type is None:
            raise InvalidMessageVersion("Unknown message format: " +
                                        str(message))

        if processed is None:
            return None

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )