Exemplo n.º 1
0
    def process_message(self, message, metadata=None) -> Optional[ProcessedMessage]:
        # some old relays accidentally emit rows without release
        if message["release"] is None:
            return None
        if message["duration"] is None:
            duration = None
        else:
            duration = _collapse_uint32(int(message["duration"] * 1000))

        # since duration is not nullable, the max duration means no duration
        if duration is None:
            duration = MAX_UINT32

        processed = {
            "session_id": str(uuid.UUID(message["session_id"])),
            "distinct_id": str(uuid.UUID(message.get("distinct_id") or NIL_UUID)),
            "seq": message["seq"],
            "org_id": message["org_id"],
            "project_id": message["project_id"],
            "retention_days": message["retention_days"],
            "duration": duration,
            "status": STATUS_MAPPING[message["status"]],
            "errors": _collapse_uint16(message["errors"]) or 0,
            "received": _ensure_valid_date(
                datetime.utcfromtimestamp(message["received"])
            ),
            "started": _ensure_valid_date(
                datetime.utcfromtimestamp(message["started"])
            ),
            "release": message["release"],
            "environment": message.get("environment") or "",
        }
        return ProcessedMessage(action=ProcessorAction.INSERT, data=[processed])
Exemplo n.º 2
0
    def process_message(
            self, message: Mapping[str, Any],
            metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]:
        # some old relays accidentally emit rows without release
        if message["release"] is None:
            return None
        if message["duration"] is None:
            duration = None
        else:
            duration = _collapse_uint32(int(message["duration"] * 1000))

        # since duration is not nullable, the max duration means no duration
        if duration is None:
            duration = MAX_UINT32

        errors = _collapse_uint16(message["errors"]) or 0
        quantity = _collapse_uint32(message.get("quantity")) or 1

        # If a session ends in crashed or abnormal we want to make sure that
        # they count as errored too, so we can get the number of health and
        # errored sessions correctly.
        if message["status"] in ("crashed", "abnormal"):
            errors = max(errors, 1)

        received = _ensure_valid_date(
            datetime.utcfromtimestamp(message["received"]))
        started = _ensure_valid_date(
            datetime.utcfromtimestamp(message["started"]))

        if started is None:
            metrics.increment("empty_started_date")
        if received is None:
            metrics.increment("empty_received_date")

        processed = {
            "session_id": str(uuid.UUID(message["session_id"])),
            "distinct_id":
            str(uuid.UUID(message.get("distinct_id") or NIL_UUID)),
            "quantity": quantity,
            "seq": message["seq"],
            "org_id": message["org_id"],
            "project_id": message["project_id"],
            "retention_days": message["retention_days"],
            "duration": duration,
            "status": STATUS_MAPPING[message["status"]],
            "errors": errors,
            "received": received if received is not None else datetime.now(),
            "started": started if started is not None else datetime.now(),
            "release": message["release"],
            "environment": message.get("environment") or "",
        }
        return InsertBatch([processed], None)
Exemplo n.º 3
0
    def process_message(
            self, value: Mapping[str, Any],
            metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]:
        assert isinstance(value, dict)
        v_uuid = value.get("event_id")
        reason = value.get("reason")

        # relays let arbitrary outcome reasons through do the topic.  We
        # reject undesired values only in the processor so that we can
        # add new ones without having to update relays through the entire
        # chain.
        if value["outcome"] == OUTCOME_CLIENT_DISCARD:
            if reason is not None and reason not in CLIENT_DISCARD_REASONS:
                reason = None

        if (value["outcome"] != OUTCOME_ABUSE
            ):  # we dont care about abuse outcomes for these metrics
            if "category" not in value:
                metrics.increment("missing_category")
            if "quantity" not in value:
                metrics.increment("missing_quantity")

        message = None
        try:
            timestamp = _ensure_valid_date(
                datetime.strptime(value["timestamp"],
                                  settings.PAYLOAD_DATETIME_FORMAT), )
        except Exception:
            metrics.increment("bad_outcome_timestamp")
            timestamp = _ensure_valid_date(datetime.utcnow())

        try:
            message = {
                "org_id": value.get("org_id", 0),
                "project_id": value.get("project_id", 0),
                "key_id": value.get("key_id"),
                "timestamp": timestamp,
                "outcome": value["outcome"],
                "category": value.get("category", DataCategory.ERROR),
                "quantity": value.get("quantity", 1),
                "reason": _unicodify(reason),
                "event_id":
                str(uuid.UUID(v_uuid)) if v_uuid is not None else None,
            }
        except Exception:
            metrics.increment("bad_outcome")
            return None

        return InsertBatch([message], None)
Exemplo n.º 4
0
def enforce_retention(
    retention_days: Optional[int], timestamp: Optional[datetime]
) -> int:
    if not isinstance(retention_days, int):
        retention_days = settings.DEFAULT_RETENTION_DAYS

    if settings.ENFORCE_RETENTION:
        retention_days = (
            settings.LOWER_RETENTION_DAYS
            if retention_days <= settings.LOWER_RETENTION_DAYS
            else settings.DEFAULT_RETENTION_DAYS
        )

    # This is not ideal but it should never happen anyways
    timestamp = _ensure_valid_date(timestamp)
    if timestamp is None:
        timestamp = datetime.utcnow()
    # TODO: We may need to allow for older events in the future when post
    # processing triggers are based off of Snuba. Or this branch could be put
    # behind a "backfill-only" optional switch.
    if settings.DISCARD_OLD_EVENTS and timestamp < (
        datetime.utcnow() - timedelta(days=retention_days)
    ):
        raise EventTooOld
    return retention_days
Exemplo n.º 5
0
    def process_message(
        self, value: Mapping[str, Any], metadata: KafkaMessageMetadata
    ) -> Optional[ProcessedMessage]:
        assert isinstance(value, dict)

        # Only record outcomes from traditional error tracking events, which
        # excludes transactions, attachments and sessions. Once TSDB defines
        # models for these, we can start recording again.
        category = value.get("category")
        if category is not None and category not in DataCategory.error_categories():
            return None

        v_uuid = value.get("event_id")
        message = {
            "org_id": value.get("org_id", 0),
            "project_id": value.get("project_id", 0),
            "key_id": value.get("key_id"),
            "timestamp": _ensure_valid_date(
                datetime.strptime(value["timestamp"], settings.PAYLOAD_DATETIME_FORMAT),
            ),
            "outcome": value["outcome"],
            "reason": _unicodify(value.get("reason")),
            "event_id": str(uuid.UUID(v_uuid)) if v_uuid is not None else None,
        }

        return InsertBatch([message], None)
Exemplo n.º 6
0
    def process_message(self,
                        value,
                        metadata=None) -> Optional[ProcessedMessage]:
        assert isinstance(value, dict)
        v_uuid = value.get("event_id")
        message = {
            "org_id":
            value.get("org_id", 0),
            "project_id":
            value.get("project_id", 0),
            "key_id":
            value.get("key_id"),
            "timestamp":
            _ensure_valid_date(
                datetime.strptime(value["timestamp"],
                                  settings.PAYLOAD_DATETIME_FORMAT), ),
            "outcome":
            value["outcome"],
            "reason":
            _unicodify(value.get("reason")),
            "event_id":
            str(uuid.UUID(v_uuid)) if v_uuid is not None else None,
        }

        return ProcessedMessage(
            action=ProcessorAction.INSERT,
            data=[message],
        )
 def __extract_timestamp(self, field: int) -> Tuple[datetime, int]:
     # We are purposely using a naive datetime here to work with the rest of the codebase.
     # We can be confident that clients are only sending UTC dates.
     timestamp = _ensure_valid_date(datetime.utcfromtimestamp(field))
     if timestamp is None:
         timestamp = datetime.utcnow()
     milliseconds = int(timestamp.microsecond / 1000)
     return (timestamp, milliseconds)
Exemplo n.º 8
0
    def extract_required(self, output, message):
        output['group_id'] = message['group_id'] or 0

        # This is not ideal but it should never happen anyways
        timestamp = _ensure_valid_date(
            datetime.strptime(message['datetime'],
                              settings.PAYLOAD_DATETIME_FORMAT))
        if timestamp is None:
            timestamp = datetime.utcnow()

        output['timestamp'] = timestamp
Exemplo n.º 9
0
    def extract_required(self, output: MutableMapping[str, Any],
                         event: Mapping[str, Any]) -> None:
        output["group_id"] = event["group_id"] or 0

        # This is not ideal but it should never happen anyways
        timestamp = _ensure_valid_date(
            datetime.strptime(event["datetime"],
                              settings.PAYLOAD_DATETIME_FORMAT))
        if timestamp is None:
            timestamp = datetime.utcnow()

        output["timestamp"] = timestamp
Exemplo n.º 10
0
    def process_message(
            self, message: Mapping[str, Any],
            metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]:
        if not self._should_process(message):
            return None

        timestamp = _ensure_valid_date(
            datetime.utcfromtimestamp(message["timestamp"]))
        assert timestamp is not None

        keys = []
        values = []
        tags = message["tags"]
        assert isinstance(tags, Mapping)
        for key, value in sorted(tags.items()):
            assert key.isdigit()
            keys.append(int(key))
            assert isinstance(value, int)
            values.append(value)

        try:
            retention_days = enforce_retention(message["retention_days"],
                                               timestamp)
        except EventTooOld:
            return None

        processed = [{
            "org_id":
            _literal(message["org_id"]),
            "project_id":
            _literal(message["project_id"]),
            "metric_id":
            _literal(message["metric_id"]),
            "timestamp":
            _call(
                "toDateTime",
                (_literal(
                    timestamp_to_bucket(timestamp,
                                        granularity).isoformat()), ),
            ),
            "tags.key":
            _array_literal(keys),
            "tags.value":
            _array_literal(values),
            **self._process_values(message),
            "retention_days":
            _literal(retention_days),
            "granularity":
            _literal(granularity),
        } for granularity in self.GRANULARITIES_SECONDS]
        return AggregateInsertBatch(processed, None)
Exemplo n.º 11
0
    def process_message(
            self, message: Mapping[str, Any],
            metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]:
        # TODO: Support messages with multiple buckets

        if not self._should_process(message):
            return None

        timestamp = _ensure_valid_date(
            datetime.utcfromtimestamp(message["timestamp"]))
        assert timestamp is not None, "Invalid timestamp"

        keys = []
        values = []
        tags = message["tags"]
        assert isinstance(tags, Mapping), "Invalid tags type"
        for key, value in sorted(tags.items()):
            assert key.isdigit() and isinstance(value,
                                                int), "Tag key/value invalid"
            keys.append(int(key))
            values.append(value)

        mat_version = (DISABLED_MATERIALIZATION_VERSION
                       if settings.WRITE_METRICS_AGG_DIRECTLY else
                       settings.ENABLED_MATERIALIZATION_VERSION)

        try:
            retention_days = enforce_retention(message["retention_days"],
                                               timestamp)
        except EventTooOld:
            return None

        processed = {
            "org_id": message["org_id"],
            "project_id": message["project_id"],
            "metric_id": message["metric_id"],
            "timestamp": timestamp,
            "tags.key": keys,
            "tags.value": values,
            **self._process_values(message),
            "materialization_version": mat_version,
            "retention_days": retention_days,
            "partition": metadata.partition,
            "offset": metadata.offset,
        }
        return InsertBatch([processed], None)
Exemplo n.º 12
0
def enforce_retention(message, timestamp):
    project_id = message['project_id']
    retention_days = settings.RETENTION_OVERRIDES.get(project_id)
    if retention_days is None:
        retention_days = int(
            message.get('retention_days') or settings.DEFAULT_RETENTION_DAYS)

    # This is not ideal but it should never happen anyways
    timestamp = _ensure_valid_date(timestamp)
    if timestamp is None:
        timestamp = datetime.utcnow()
    # TODO: We may need to allow for older events in the future when post
    # processing triggers are based off of Snuba. Or this branch could be put
    # behind a "backfill-only" optional switch.
    if settings.DISCARD_OLD_EVENTS and timestamp < (
            datetime.utcnow() - timedelta(days=retention_days)):
        raise EventTooOld
    return retention_days
Exemplo n.º 13
0
    def process_message(self, value, metadata):
        assert isinstance(value, dict)
        v_uuid = value.get('event_id')
        message = {
            'org_id':
            value.get('org_id', 0),
            'project_id':
            value.get('project_id', 0),
            'key_id':
            value.get('key_id'),
            'timestamp':
            _ensure_valid_date(
                datetime.strptime(value['timestamp'],
                                  settings.PAYLOAD_DATETIME_FORMAT), ),
            'outcome':
            value['outcome'],
            'reason':
            _unicodify(value.get('reason')),
            'event_id':
            str(uuid.UUID(v_uuid)) if v_uuid is not None else None,
        }

        return (self.INSERT, message)
Exemplo n.º 14
0
    def process_message(
            self, value: Mapping[str, Any],
            metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]:
        assert isinstance(value, dict)
        v_uuid = value.get("event_id")

        if value["outcome"] != 4:  # we dont care about abuse outcomes for these metrics
            if "category" not in value:
                metrics.increment("missing_category")
            if "quantity" not in value:
                metrics.increment("missing_quantity")

        message = {
            "org_id":
            value.get("org_id", 0),
            "project_id":
            value.get("project_id", 0),
            "key_id":
            value.get("key_id"),
            "timestamp":
            _ensure_valid_date(
                datetime.strptime(value["timestamp"],
                                  settings.PAYLOAD_DATETIME_FORMAT), ),
            "outcome":
            value["outcome"],
            "category":
            value.get("category", DataCategory.ERROR),
            "quantity":
            value.get("quantity", 1),
            "reason":
            _unicodify(value.get("reason")),
            "event_id":
            str(uuid.UUID(v_uuid)) if v_uuid is not None else None,
        }

        return InsertBatch([message], None)
Exemplo n.º 15
0
    def process_message(
            self, message: Mapping[str, Any],
            metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]:
        # TODO: Support messages with multiple buckets

        if not self._should_process(message):
            return None

        timestamp = _ensure_valid_date(
            datetime.utcfromtimestamp(message["timestamp"]))
        assert timestamp is not None

        keys = []
        values = []
        tags = message["tags"]
        assert isinstance(tags, Mapping)
        for key, value in sorted(tags.items()):
            assert key.isdigit()
            keys.append(int(key))
            assert isinstance(value, int)
            values.append(value)

        processed = {
            "org_id": message["org_id"],
            "project_id": message["project_id"],
            "metric_id": message["metric_id"],
            "timestamp": timestamp,
            "tags.key": keys,
            "tags.value": values,
            **self._process_values(message),
            "materialization_version": 0,
            "retention_days": message["retention_days"],
            "partition": metadata.partition,
            "offset": metadata.offset,
        }
        return InsertBatch([processed], None)
Exemplo n.º 16
0
 def __extract_timestamp(self, field: float) -> Tuple[datetime, int]:
     timestamp = _ensure_valid_date(datetime.fromtimestamp(field))
     if timestamp is None:
         timestamp = datetime.utcnow()
     nanoseconds = int(timestamp.microsecond * 1000)
     return (timestamp, nanoseconds)
Exemplo n.º 17
0
 def __extract_timestamp(self, field):
     timestamp = _ensure_valid_date(datetime.fromtimestamp(field))
     if timestamp is None:
         timestamp = datetime.utcnow()
     milliseconds = int(timestamp.microsecond / 1000)
     return (timestamp, milliseconds)
Exemplo n.º 18
0
 def __extract_timestamp(self, field: int) -> datetime:
     timestamp = _ensure_valid_date(datetime.utcfromtimestamp(field))
     if timestamp is None:
         timestamp = datetime.utcnow()
     return timestamp