Exemplo n.º 1
0
    flatten_nested_field,
)
from snuba.processor import (
    InsertBatch,
    MessageProcessor,
    ProcessedMessage,
    _as_dict_safe,
    _ensure_valid_date,
    _ensure_valid_ip,
    _unicodify,
)
from snuba.utils.metrics.wrapper import MetricsWrapper

logger = logging.getLogger(__name__)

metrics = MetricsWrapper(environment.metrics, "transactions.processor")

UNKNOWN_SPAN_STATUS = 2


class TransactionsMessageProcessor(MessageProcessor):
    PROMOTED_TAGS = {
        "environment",
        "sentry:release",
        "sentry:user",
        "sentry:dist",
    }

    def __extract_timestamp(self, field):
        timestamp = _ensure_valid_date(datetime.fromtimestamp(field))
        if timestamp is None:
Exemplo n.º 2
0
from snuba import environment
from snuba.datasets.events_format import enforce_retention, extract_extra_tags
from snuba.processor import (
    InsertBatch,
    MessageProcessor,
    ProcessedMessage,
    _as_dict_safe,
    _ensure_valid_date,
    _unicodify,
)
from snuba.utils.metrics.wrapper import MetricsWrapper

UNKNOWN_SPAN_STATUS = 2

metrics = MetricsWrapper(environment.metrics, "spans.processor")


class SpansMessageProcessor(MessageProcessor):
    def __extract_timestamp(self, field: float) -> Tuple[datetime, int]:
        timestamp = _ensure_valid_date(datetime.fromtimestamp(field))
        if timestamp is None:
            timestamp = datetime.utcnow()
        nanoseconds = int(timestamp.microsecond * 1000)
        return (timestamp, nanoseconds)

    def __init_span(self, event: Mapping[str,
                                         Any]) -> MutableMapping[str, Any]:
        """
        Initializes the fields that are the same for all spans within a transaction.
        """
Exemplo n.º 3
0
from snuba.request.request_settings import HTTPRequestSettings, RequestSettings
from snuba.request.schema import RequestParts, RequestSchema
from snuba.request.validation import build_request, parse_legacy_query, parse_snql_query
from snuba.state.rate_limit import RateLimitExceeded
from snuba.subscriptions.codecs import SubscriptionDataCodec
from snuba.subscriptions.data import InvalidSubscriptionError, PartitionId
from snuba.subscriptions.subscription import SubscriptionCreator, SubscriptionDeleter
from snuba.util import with_span
from snuba.utils.metrics.timer import Timer
from snuba.utils.metrics.wrapper import MetricsWrapper
from snuba.web import QueryException
from snuba.web.converters import DatasetConverter
from snuba.web.query import parse_and_run_query
from snuba.writer import BatchWriterEncoderWrapper, WriterTableRow

metrics = MetricsWrapper(environment.metrics, "api")

logger = logging.getLogger("snuba.api")

# Flask wants a Dict, not a Mapping
RespTuple = Tuple[Text, int, Dict[Any, Any]]

try:
    import uwsgi
except ImportError:

    def check_down_file_exists() -> bool:
        return False

else:
Exemplo n.º 4
0
from snuba.query.matchers import FunctionCall as FunctionCallMatch
from snuba.query.matchers import String as StringMatch
from snuba.query.parser.conditions import parse_conditions_to_expr
from snuba.query.parser.exceptions import (
    AliasShadowingException,
    CyclicAliasException,
    ParsingException,
)
from snuba.query.parser.expressions import parse_aggregation, parse_expression
from snuba.query.parser.validation import validate_query
from snuba.util import is_function, to_list, tuplify
from snuba.utils.metrics.wrapper import MetricsWrapper

logger = logging.getLogger(__name__)

metrics = MetricsWrapper(environment.metrics, "parser")


def parse_query(body: MutableMapping[str, Any], dataset: Dataset) -> Query:
    """
    Parses the query body generating the AST. This only takes into
    account the initial query body. Extensions are parsed by extension
    processors and are supposed to update the AST.

    Parsing includes two phases. The first transforms the json body into
    a minimal query Object resolving expressions, conditions, etc.
    The second phase performs some query processing to provide a sane
    query to the dataset specific section.
    - It prevents alias shadowing.
    - It transforms columns from the tags[asd] form into
      SubscriptableReference.
Exemplo n.º 5
0
    def __init__(
        self,
        storage_key: StorageKey,
        raw_topic: Optional[str],
        replacements_topic: Optional[str],
        max_batch_size: int,
        max_batch_time_ms: int,
        bootstrap_servers: Sequence[str],
        group_id: str,
        commit_log_topic: Optional[str],
        auto_offset_reset: str,
        queued_max_messages_kbytes: int,
        queued_min_messages: int,
        strategy_factory_type: StrategyFactoryType,
        processes: Optional[int],
        input_block_size: Optional[int],
        output_block_size: Optional[int],
        commit_retry_policy: Optional[RetryPolicy] = None,
        profile_path: Optional[str] = None,
    ) -> None:
        self.storage = get_writable_storage(storage_key)
        self.bootstrap_servers = bootstrap_servers

        stream_loader = self.storage.get_table_writer().get_stream_loader()

        self.raw_topic: Topic
        if raw_topic is not None:
            self.raw_topic = Topic(raw_topic)
        else:
            self.raw_topic = Topic(stream_loader.get_default_topic_spec().topic_name)

        self.replacements_topic: Optional[Topic]
        if replacements_topic is not None:
            self.replacements_topic = Topic(replacements_topic)
        else:
            replacement_topic_spec = stream_loader.get_replacement_topic_spec()
            if replacement_topic_spec is not None:
                self.replacements_topic = Topic(replacement_topic_spec.topic_name)
            else:
                self.replacements_topic = None

        self.commit_log_topic: Optional[Topic]
        if commit_log_topic is not None:
            self.commit_log_topic = Topic(commit_log_topic)
        else:
            commit_log_topic_spec = stream_loader.get_commit_log_topic_spec()
            if commit_log_topic_spec is not None:
                self.commit_log_topic = Topic(commit_log_topic_spec.topic_name)
            else:
                self.commit_log_topic = None

        # XXX: This can result in a producer being built in cases where it's
        # not actually required.
        self.producer = Producer(
            {
                "bootstrap.servers": ",".join(self.bootstrap_servers),
                "partitioner": "consistent",
                "message.max.bytes": 50000000,  # 50MB, default is 1MB
            }
        )

        self.metrics = MetricsWrapper(
            environment.metrics,
            "consumer",
            tags={"group": group_id, "storage": storage_key.value},
        )

        self.max_batch_size = max_batch_size
        self.max_batch_time_ms = max_batch_time_ms
        self.group_id = group_id
        self.auto_offset_reset = auto_offset_reset
        self.queued_max_messages_kbytes = queued_max_messages_kbytes
        self.queued_min_messages = queued_min_messages
        self.strategy_factory_type = strategy_factory_type
        self.processes = processes
        self.input_block_size = input_block_size
        self.output_block_size = output_block_size
        self.__profile_path = profile_path

        if (
            self.processes is not None
            and self.strategy_factory_type is not StrategyFactoryType.STREAMING
        ):
            raise ValueError(
                "process count can only be specified when using streaming strategy"
            )

        if commit_retry_policy is None:
            commit_retry_policy = BasicRetryPolicy(
                3,
                constant_delay(1),
                lambda e: isinstance(e, KafkaException)
                and e.args[0].code()
                in (
                    KafkaError.REQUEST_TIMED_OUT,
                    KafkaError.NOT_COORDINATOR_FOR_GROUP,
                    KafkaError._WAIT_COORD,
                ),
            )

        self.__commit_retry_policy = commit_retry_policy
Exemplo n.º 6
0
    RateLimitAggregator,
    RateLimitExceeded,
    RateLimitStatsContainer,
    get_global_rate_limit_params,
)
from snuba.util import force_bytes, with_span
from snuba.utils.codecs import ExceptionAwareCodec
from snuba.utils.metrics.timer import Timer
from snuba.utils.metrics.wrapper import MetricsWrapper
from snuba.utils.serializable_exception import (
    SerializableException,
    SerializableExceptionDict,
)
from snuba.web import QueryException, QueryResult

metrics = MetricsWrapper(environment.metrics, "db_query")


class ResultCacheCodec(ExceptionAwareCodec[bytes, Result]):
    def encode(self, value: Result) -> bytes:
        return cast(str, rapidjson.dumps(value)).encode("utf-8")

    def decode(self, value: bytes) -> Result:
        ret = rapidjson.loads(value)
        if ret.get("__type__", "DNE") == "SerializableException":
            raise SerializableException.from_dict(cast(SerializableExceptionDict, ret))
        if not isinstance(ret, Mapping) or "meta" not in ret or "data" not in ret:
            raise ValueError("Invalid value type in result cache")
        return cast(Result, ret)

    def encode_exception(self, value: SerializableException) -> bytes:
Exemplo n.º 7
0
def multistorage_consumer(
    storage_names: Sequence[str],
    consumer_group: str,
    max_batch_size: int,
    max_batch_time_ms: int,
    auto_offset_reset: str,
    queued_max_messages_kbytes: int,
    queued_min_messages: int,
    processes: int,
    input_block_size: int,
    output_block_size: int,
    log_level: Optional[str] = None,
) -> None:

    setup_logging(log_level)
    setup_sentry()

    storages = {
        key: get_writable_storage(key)
        for key in (getattr(StorageKey, name.upper()) for name in storage_names)
    }

    topics = {
        storage.get_table_writer()
        .get_stream_loader()
        .get_default_topic_spec()
        .topic_name
        for storage in storages.values()
    }

    # XXX: The ``StreamProcessor`` only supports a single topic at this time,
    # but is easily modified. The topic routing in the processing strategy is a
    # bit trickier (but also shouldn't be too bad.)
    topic = Topic(topics.pop())
    if topics:
        raise ValueError("only one topic is supported")

    # XXX: The ``CommitLogConsumer`` also only supports a single topic at this
    # time. (It is less easily modified.) This also assumes the commit log
    # topic is on the same Kafka cluster as the input topic.
    commit_log_topics = {
        spec.topic_name
        for spec in (
            storage.get_table_writer().get_stream_loader().get_commit_log_topic_spec()
            for storage in storages.values()
        )
        if spec is not None
    }

    commit_log_topic: Optional[Topic]
    if commit_log_topics:
        commit_log_topic = Topic(commit_log_topics.pop())
    else:
        commit_log_topic = None

    if commit_log_topics:
        raise ValueError("only one commit log topic is supported")

    # XXX: This requires that all storages are associated with the same Kafka
    # cluster so that they can be consumed by the same consumer instance.
    # Unfortunately, we don't have the concept of independently configurable
    # Kafka clusters in settings, only consumer configurations that are
    # associated with storages and/or global default configurations. To avoid
    # implementing yet another method of configuring Kafka clusters, this just
    # piggybacks on the existing configuration method(s), with the assumption
    # that most deployments are going to be using the default configuration.
    storage_keys = [*storages.keys()]
    consumer_configuration = build_kafka_consumer_configuration(
        storage_keys[0],
        consumer_group,
        auto_offset_reset=auto_offset_reset,
        queued_max_messages_kbytes=queued_max_messages_kbytes,
        queued_min_messages=queued_min_messages,
    )

    for storage_key in storage_keys[1:]:
        if (
            build_kafka_consumer_configuration(storage_key, consumer_group)[
                "bootstrap.servers"
            ]
            != consumer_configuration["bootstrap.servers"]
        ):
            raise ValueError("storages cannot be located on different Kafka clusters")

    if commit_log_topic is None:
        consumer = KafkaConsumer(consumer_configuration)
    else:
        # XXX: This relies on the assumptions that a.) the Kafka cluster where
        # the commit log topic is located is the same as the input topic (there
        # is no way to specify otherwise, at writing) and b.) all storages are
        # located on the same Kafka cluster (validated above.)
        producer = ConfluentKafkaProducer(
            build_kafka_producer_configuration(storage_keys[0])
        )
        consumer = KafkaConsumerWithCommitLog(
            consumer_configuration,
            producer=producer,
            commit_log_topic=commit_log_topic,
        )

    metrics = MetricsWrapper(environment.metrics, "consumer")
    processor = StreamProcessor(
        consumer,
        topic,
        MultistorageConsumerProcessingStrategyFactory(
            [*storages.values()],
            max_batch_size,
            max_batch_time_ms / 1000.0,
            processes=processes,
            input_block_size=input_block_size,
            output_block_size=output_block_size,
            metrics=metrics,
        ),
        metrics=metrics,
    )

    def handler(signum: int, frame: Any) -> None:
        processor.signal_shutdown()

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    processor.run()
Exemplo n.º 8
0
    cast,
)
from uuid import UUID

from sentry_sdk import set_tag

from snuba import environment, settings
from snuba.query.logical import Query
from snuba.query.query_settings import QuerySettings
from snuba.reader import Row
from snuba.state import get_config
from snuba.utils.metrics.wrapper import MetricsWrapper
from snuba.utils.threaded_function_delegator import Result
from snuba.web import QueryResult

metrics = MetricsWrapper(environment.metrics, "query.similarity")
logger = logging.getLogger("snuba.upgrade_discrepancies")


class Option(Enum):
    ERRORS = 1
    ERRORS_V2 = 2
    TRANSACTIONS = 3
    TRANSACTIONS_V2 = 4


class Choice(NamedTuple):
    primary: Option
    secondary: Optional[Option]

Exemplo n.º 9
0
from snuba.clickhouse.query import Query
from snuba.query.matchers import (
    AnyOptionalString,
    Or,
    Param,
    String,
)
from snuba.query.matchers import Column as ColumnMatch
from snuba.query.matchers import FunctionCall as FunctionCallMatch
from snuba.query.matchers import Literal as LiteralMatch
from snuba.clickhouse.processors import QueryProcessor
from snuba.request.request_settings import RequestSettings
from snuba.utils.metrics.wrapper import MetricsWrapper


metrics = MetricsWrapper(environment.metrics, "api.query.uuid_processor")


class UUIDColumnProcessor(QueryProcessor):
    """
    If a condition is being performed on a column that stores UUIDs (as defined in the constructor)
    then change the condition to use a proper UUID instead of a string.
    """

    def formatted_uuid_pattern(self, suffix: str = "") -> FunctionCallMatch:
        return FunctionCallMatch(
            String("replaceAll"),
            (
                FunctionCallMatch(
                    String("toString"),
                    (
Exemplo n.º 10
0
    def __init__(self) -> None:
        super().__init__(default_entity=EntityKey.DISCOVER)

    # XXX: This is temporary code that will eventually need to be ported to Sentry
    # since SnQL will require an entity to always be specified by the user.
    def select_entity(self, query: Query) -> EntityKey:
        selected_entity = match_query_to_entity(
            query, EVENTS_COLUMNS, TRANSACTIONS_COLUMNS
        )

        track_bad_query(query, selected_entity, EVENTS_COLUMNS, TRANSACTIONS_COLUMNS)

        return selected_entity


metrics = MetricsWrapper(environment.metrics, "api.discover")
logger = logging.getLogger(__name__)


EVENT_CONDITION = FunctionCallMatch(
    Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])),
    (
        Or([ColumnMatch(None, StringMatch("type")), LiteralMatch(None)]),
        Param("event_type", Or([ColumnMatch(), LiteralMatch()])),
    ),
)

TRANSACTION_FUNCTIONS = FunctionCallMatch(
    Or([StringMatch("apdex"), StringMatch("failure_rate")]), None
)
Exemplo n.º 11
0
    extract_user,
)
from snuba.processor import (
    InsertBatch,
    MessageProcessor,
    ProcessedMessage,
    _as_dict_safe,
    _ensure_valid_date,
    _ensure_valid_ip,
    _unicodify,
)
from snuba.utils.metrics.wrapper import MetricsWrapper

logger = logging.getLogger(__name__)

metrics = MetricsWrapper(environment.metrics, "replays.processor")
from sentry_sdk import capture_exception

EventDict = Mapping[Any, Any]
RetentionDays = int


class ReplaysProcessor(MessageProcessor):
    PROMOTED_TAGS = {
        "environment",
        "sentry:release",
        "sentry:user",
        "sentry:dist",
    }

    def __extract_timestamp(self, field: int) -> datetime: