Exemplo n.º 1
0
    def extract_custom(
        self,
        output: MutableMapping[str, Any],
        event: InsertEvent,
        metadata: KafkaMessageMetadata,
    ) -> None:
        data = event.get("data", {})

        output["message"] = _unicodify(event["message"])

        # USER REQUEST GEO
        user = data.get("user", data.get("sentry.interfaces.User", None)) or {}
        extract_user(output, user)

        geo = user.get("geo", None) or {}
        self.extract_geo(output, geo)

        request = data.get("request", data.get("sentry.interfaces.Http",
                                               None)) or {}
        http_data: MutableMapping[str, Any] = {}
        extract_http(http_data, request)
        output["http_method"] = http_data["http_method"]
        output["http_referer"] = http_data["http_referer"]

        output["primary_hash"] = _hashify(event["primary_hash"])
        output["hierarchical_hashes"] = list(
            _hashify(x) for x in data.get("hierarchical_hashes") or ())

        output["culprit"] = _unicodify(data.get("culprit", None))
        output["type"] = _unicodify(data.get("type", None))
        output["title"] = _unicodify(data.get("title", None))
Exemplo n.º 2
0
def process_unmerge(
    message: Mapping[str, Any],
    all_columns: Sequence[FlattenedColumn],
    state_name: ReplacerState,
) -> Optional[Replacement]:
    hashes = message["hashes"]
    if not hashes:
        return None

    assert all(isinstance(h, str) for h in hashes)

    timestamp = datetime.strptime(message["datetime"],
                                  settings.PAYLOAD_DATETIME_FORMAT)
    all_column_names = [c.escaped for c in all_columns]
    select_columns = map(
        lambda i: i if i != "group_id" else str(message["new_group_id"]),
        all_column_names,
    )

    where = """\
        PREWHERE group_id = %(previous_group_id)s
        WHERE project_id = %(project_id)s
        AND primary_hash IN (%(hashes)s)
        AND received <= CAST('%(timestamp)s' AS DateTime)
        AND NOT deleted
    """

    count_query_template = ("""\
        SELECT count()
        FROM %(table_name)s FINAL
    """ + where)

    insert_query_template = ("""\
        INSERT INTO %(table_name)s (%(all_columns)s)
        SELECT %(select_columns)s
        FROM %(table_name)s FINAL
    """ + where)

    query_args = {
        "all_columns": ", ".join(all_column_names),
        "select_columns": ", ".join(select_columns),
        "previous_group_id": message["previous_group_id"],
        "project_id": message["project_id"],
        "timestamp": timestamp.strftime(DATETIME_FORMAT),
    }

    if state_name == ReplacerState.ERRORS:
        query_args["hashes"] = ", ".join(
            ["'%s'" % str(uuid.UUID(_hashify(h))) for h in hashes])
    else:
        query_args["hashes"] = ", ".join("'%s'" % _hashify(h) for h in hashes)

    query_time_flags = (NEEDS_FINAL, message["project_id"])

    return Replacement(count_query_template, insert_query_template, query_args,
                       query_time_flags)
Exemplo n.º 3
0
    def _where_clause(self) -> str:
        if self.state_name == ReplacerState.ERRORS:
            hashes = ", ".join(
                ["'%s'" % str(uuid.UUID(_hashify(h))) for h in self.hashes])
        else:
            hashes = ", ".join("'%s'" % _hashify(h) for h in self.hashes)

        timestamp = self.timestamp.strftime(DATETIME_FORMAT)

        return f"""\
Exemplo n.º 4
0
def _convert_hash(
    hash: str, state_name: ReplacerState, convert_types: bool = False
) -> str:
    if state_name == ReplacerState.ERRORS or state_name == ReplacerState.ERRORS_V2:
        if convert_types:
            return "toUUID('%s')" % str(uuid.UUID(_hashify(hash)))
        else:
            return "'%s'" % str(uuid.UUID(_hashify(hash)))
    else:
        if convert_types:
            return "toFixedString('%s', 32)" % _hashify(hash)
        else:
            return "'%s'" % _hashify(hash)
Exemplo n.º 5
0
    def extract_custom(
        self,
        output: MutableMapping[str, Any],
        event: InsertEvent,
        metadata: KafkaMessageMetadata,
    ) -> None:
        data = event.get("data", {})
        user_dict = data.get("user", data.get("sentry.interfaces.User",
                                              None)) or {}

        user_data: MutableMapping[str, Any] = {}
        extract_user(user_data, user_dict)
        output["user_name"] = user_data["username"]
        output["user_id"] = user_data["user_id"]
        output["user_email"] = user_data["email"]

        ip_address = _ensure_valid_ip(user_data["ip_address"])
        if ip_address:
            if ip_address.version == 4:
                output["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                output["ip_address_v6"] = str(ip_address)

        contexts: MutableMapping[str, Any] = _as_dict_safe(
            data.get("contexts", None))
        geo = user_dict.get("geo", {})
        if "geo" not in contexts and isinstance(geo, dict):
            contexts["geo"] = geo

        request = data.get("request", data.get("sentry.interfaces.Http",
                                               None)) or {}
        http_data: MutableMapping[str, Any] = {}
        extract_http(http_data, request)
        output["http_method"] = http_data["http_method"]
        output["http_referer"] = http_data["http_referer"]

        # _as_dict_safe may not return a reference to the entry in the data
        # dictionary in some cases.
        data["contexts"] = contexts

        output["message"] = _unicodify(event["message"])

        output["primary_hash"] = str(uuid.UUID(_hashify(
            event["primary_hash"])))
        output["hierarchical_hashes"] = list(
            str(uuid.UUID(_hashify(x)))
            for x in data.get("hierarchical_hashes") or ())

        output["culprit"] = _unicodify(data.get("culprit", ""))
        output["type"] = _unicodify(data.get("type", ""))
        output["title"] = _unicodify(data.get("title", ""))
Exemplo n.º 6
0
    def extract_common(
        self,
        output: MutableMapping[str, Any],
        event: InsertEvent,
        metadata: KafkaMessageMetadata,
    ) -> None:
        # Properties we get from the top level of the message payload
        output["platform"] = _unicodify(event["platform"])
        output["primary_hash"] = _hashify(event["primary_hash"])

        # Properties we get from the "data" dict, which is the actual event body.
        data = event.get("data", {})
        received = _collapse_uint32(int(data["received"]))
        output["received"] = (datetime.utcfromtimestamp(received)
                              if received is not None else None)

        output["culprit"] = _unicodify(data.get("culprit", None))
        output["type"] = _unicodify(data.get("type", None))
        output["version"] = _unicodify(data.get("version", None))
        output["title"] = _unicodify(data.get("title", None))
        output["location"] = _unicodify(data.get("location", None))

        module_names = []
        module_versions = []
        modules = data.get("modules", {})
        if isinstance(modules, dict):
            for name, version in modules.items():
                module_names.append(_unicodify(name))
                # Being extra careful about a stray (incorrect by spec) `null`
                # value blowing up the write.
                module_versions.append(_unicodify(version) or "")

        output["modules.name"] = module_names
        output["modules.version"] = module_versions
Exemplo n.º 7
0
    def extract_common(self, output, message, data):
        # Properties we get from the top level of the message payload
        output['platform'] = _unicodify(message['platform'])
        output['primary_hash'] = _hashify(message['primary_hash'])

        # Properties we get from the "data" dict, which is the actual event body.
        received = _collapse_uint32(int(data['received']))
        output['received'] = datetime.utcfromtimestamp(
            received) if received is not None else None

        output['culprit'] = _unicodify(data.get('culprit', None))
        output['type'] = _unicodify(data.get('type', None))
        output['version'] = _unicodify(data.get('version', None))
        output['title'] = _unicodify(data.get('title', None))
        output['location'] = _unicodify(data.get('location', None))

        # The following concerns the change to message/search_message
        # There are 2 Scenarios:
        #   Pre-rename:
        #        - Payload contains:
        #             "message": "a long search message"
        #        - Does NOT contain a `search_message` property
        #        - "message" value saved in `message` column
        #        - `search_message` column nonexistent or Null
        #   Post-rename:
        #        - Payload contains:
        #             "search_message": "a long search message"
        #        - Optionally the payload's "data" dict (event body) contains:
        #             "message": "short message"
        #        - "search_message" value stored in `search_message` column
        #        - "message" value stored in `message` column
        #
        output['search_message'] = _unicodify(
            message.get('search_message', None))
        if output['search_message'] is None:
            # Pre-rename scenario, we expect to find "message" at the top level
            output['message'] = _unicodify(message['message'])
        else:
            # Post-rename scenario, we check in case we have the optional
            # "message" in the event body.
            output['message'] = _unicodify(data.get('message', None))

        module_names = []
        module_versions = []
        modules = data.get('modules', {})
        if isinstance(modules, dict):
            for name, version in modules.items():
                module_names.append(_unicodify(name))
                # Being extra careful about a stray (incorrect by spec) `null`
                # value blowing up the write.
                module_versions.append(_unicodify(version) or '')

        output['modules.name'] = module_names
        output['modules.version'] = module_versions
Exemplo n.º 8
0
def process_unmerge(message, all_column_names) -> Optional[Replacement]:
    hashes = message["hashes"]
    if not hashes:
        return None

    assert all(isinstance(h, str) for h in hashes)
    timestamp = datetime.strptime(message["datetime"], settings.PAYLOAD_DATETIME_FORMAT)
    select_columns = map(
        lambda i: i if i != "group_id" else str(message["new_group_id"]),
        all_column_names,
    )

    where = """\
        PREWHERE group_id = %(previous_group_id)s
        WHERE project_id = %(project_id)s
        AND primary_hash IN (%(hashes)s)
        AND received <= CAST('%(timestamp)s' AS DateTime)
        AND NOT deleted
    """

    count_query_template = (
        """\
        SELECT count()
        FROM %(dist_read_table_name)s FINAL
    """
        + where
    )

    insert_query_template = (
        """\
        INSERT INTO %(dist_write_table_name)s (%(all_columns)s)
        SELECT %(select_columns)s
        FROM %(dist_read_table_name)s FINAL
    """
        + where
    )

    query_args = {
        "all_columns": ", ".join(all_column_names),
        "select_columns": ", ".join(select_columns),
        "previous_group_id": message["previous_group_id"],
        "project_id": message["project_id"],
        "hashes": ", ".join("'%s'" % _hashify(h) for h in hashes),
        "timestamp": timestamp.strftime(DATETIME_FORMAT),
    }

    query_time_flags = (NEEDS_FINAL, message["project_id"])

    return Replacement(
        count_query_template, insert_query_template, query_args, query_time_flags
    )
Exemplo n.º 9
0
def process_unmerge(message, all_column_names):
    hashes = message['hashes']
    if not hashes:
        return None

    assert all(isinstance(h, str) for h in hashes)
    timestamp = datetime.strptime(message['datetime'],
                                  settings.PAYLOAD_DATETIME_FORMAT)
    select_columns = map(
        lambda i: i
        if i != 'group_id' else str(message['new_group_id']), all_column_names)

    where = """\
        WHERE project_id = %(project_id)s
        AND group_id = %(previous_group_id)s
        AND primary_hash IN (%(hashes)s)
        AND received <= CAST('%(timestamp)s' AS DateTime)
        AND NOT deleted
    """

    count_query_template = """\
        SELECT count()
        FROM %(dist_read_table_name)s FINAL
    """ + where

    insert_query_template = """\
        INSERT INTO %(dist_write_table_name)s (%(all_columns)s)
        SELECT %(select_columns)s
        FROM %(dist_read_table_name)s FINAL
    """ + where

    query_args = {
        'all_columns': ', '.join(all_column_names),
        'select_columns': ', '.join(select_columns),
        'previous_group_id': message['previous_group_id'],
        'project_id': message['project_id'],
        'hashes': ", ".join("'%s'" % _hashify(h) for h in hashes),
        'timestamp': timestamp.strftime(DATETIME_FORMAT),
    }

    query_time_flags = (NEEDS_FINAL, message['project_id'])

    return (count_query_template, insert_query_template, query_args,
            query_time_flags)