Пример #1
0
def replacer(replacements_topic, consumer_group, bootstrap_server,
             clickhouse_server, distributed_table_name, max_batch_size,
             max_batch_time_ms, auto_offset_reset, queued_max_messages_kbytes,
             queued_min_messages, log_level, dogstatsd_host, dogstatsd_port):

    import sentry_sdk
    from snuba import util
    from snuba.clickhouse import ClickhousePool
    from batching_kafka_consumer import BatchingKafkaConsumer
    from snuba.replacer import ReplacerWorker

    sentry_sdk.init(dsn=settings.SENTRY_DSN)

    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')
    metrics = util.create_metrics(dogstatsd_host,
                                  dogstatsd_port,
                                  'snuba.replacer',
                                  tags=["group:%s" % consumer_group])

    client_settings = {
        # Replacing existing rows requires reconstructing the entire tuple for each
        # event (via a SELECT), which is a Hard Thing (TM) for columnstores to do. With
        # the default settings it's common for ClickHouse to go over the default max_memory_usage
        # of 10GB per query. Lowering the max_block_size reduces memory usage, and increasing the
        # max_memory_usage gives the query more breathing room.
        'max_block_size': settings.REPLACER_MAX_BLOCK_SIZE,
        'max_memory_usage': settings.REPLACER_MAX_MEMORY_USAGE,
        # Don't use up production cache for the count() queries.
        'use_uncompressed_cache': 0,
    }

    clickhouse = ClickhousePool(
        host=clickhouse_server.split(':')[0],
        port=int(clickhouse_server.split(':')[1]),
        client_settings=client_settings,
    )

    replacer = BatchingKafkaConsumer(
        replacements_topic,
        worker=ReplacerWorker(clickhouse,
                              distributed_table_name,
                              metrics=metrics),
        max_batch_size=max_batch_size,
        max_batch_time=max_batch_time_ms,
        metrics=metrics,
        bootstrap_servers=bootstrap_server,
        group_id=consumer_group,
        producer=None,
        commit_log_topic=None,
        auto_offset_reset=auto_offset_reset,
    )

    def handler(signum, frame):
        replacer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)

    replacer.run()
Пример #2
0
def consumer(raw_events_topic, replacements_topic, commit_log_topic, consumer_group,
             bootstrap_server, clickhouse_server, distributed_table_name, max_batch_size, max_batch_time_ms,
             auto_offset_reset, queued_max_messages_kbytes, queued_min_messages, log_level,
             dogstatsd_host, dogstatsd_port):

    import sentry_sdk
    from snuba import util
    from snuba.clickhouse import ClickhousePool
    from batching_kafka_consumer import BatchingKafkaConsumer
    from snuba.consumer import ConsumerWorker

    sentry_sdk.init(dsn=settings.SENTRY_DSN)

    logging.basicConfig(level=getattr(logging, log_level.upper()), format='%(asctime)s %(message)s')
    metrics = util.create_metrics(
        dogstatsd_host, dogstatsd_port, 'snuba.consumer', tags=["group:%s" % consumer_group]
    )

    clickhouse = ClickhousePool(
        host=clickhouse_server.split(':')[0],
        port=int(clickhouse_server.split(':')[1]),
        client_settings={
            'load_balancing': 'in_order',
            'insert_distributed_sync': True,
        },
        metrics=metrics
    )

    producer = Producer({
        'bootstrap.servers': ','.join(bootstrap_server),
        'partitioner': 'consistent',
        'message.max.bytes': 50000000,  # 50MB, default is 1MB
    })

    consumer = BatchingKafkaConsumer(
        raw_events_topic,
        worker=ConsumerWorker(
            clickhouse, distributed_table_name,
            producer=producer, replacements_topic=replacements_topic, metrics=metrics
        ),
        max_batch_size=max_batch_size,
        max_batch_time=max_batch_time_ms,
        metrics=metrics,
        bootstrap_servers=bootstrap_server,
        group_id=consumer_group,
        producer=producer,
        commit_log_topic=commit_log_topic,
        auto_offset_reset=auto_offset_reset,
    )

    def handler(signum, frame):
        consumer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)

    consumer.run()
Пример #3
0
    def __init__(self, dataset_name: str, raw_topic: str,
                 replacements_topic: str, max_batch_size: int,
                 max_batch_time_ms: int, bootstrap_servers: Sequence[str],
                 group_id: str, commit_log_topic: str, auto_offset_reset: str,
                 queued_max_messages_kbytes: int, queued_min_messages: int,
                 dogstatsd_host: str, dogstatsd_port: int) -> None:
        self.dataset = get_dataset(dataset_name)
        self.dataset_name = dataset_name
        if not bootstrap_servers:
            self.bootstrap_servers = settings.DEFAULT_DATASET_BROKERS.get(
                dataset_name,
                settings.DEFAULT_BROKERS,
            )
        else:
            self.bootstrap_servers = bootstrap_servers

        stream_loader = enforce_table_writer(self.dataset).get_stream_loader()
        self.raw_topic = raw_topic or stream_loader.get_default_topic_spec(
        ).topic_name
        default_replacement_topic_name = stream_loader.get_replacement_topic_spec().topic_name \
            if stream_loader.get_replacement_topic_spec() \
            else None
        self.replacements_topic = replacements_topic or default_replacement_topic_name
        default_commit_log_topic_name = stream_loader.get_commit_log_topic_spec().topic_name \
            if stream_loader.get_commit_log_topic_spec() \
            else None
        self.commit_log_topic = commit_log_topic or default_commit_log_topic_name

        # XXX: This can result in a producer being built in cases where it's
        # not actually required.
        self.producer = Producer({
            'bootstrap.servers':
            ','.join(self.bootstrap_servers),
            'partitioner':
            'consistent',
            'message.max.bytes':
            50000000,  # 50MB, default is 1MB
        })

        self.metrics = util.create_metrics(dogstatsd_host,
                                           dogstatsd_port,
                                           'snuba.consumer',
                                           tags={
                                               "group": group_id,
                                               "dataset": self.dataset_name,
                                           })

        self.max_batch_size = max_batch_size
        self.max_batch_time_ms = max_batch_time_ms
        self.group_id = group_id
        self.auto_offset_reset = auto_offset_reset
        self.queued_max_messages_kbytes = queued_max_messages_kbytes
        self.queued_min_messages = queued_min_messages
Пример #4
0
    def __init__(self, dataset_name: str, raw_topic: str,
                 replacements_topic: str, max_batch_size: int,
                 max_batch_time_ms: int, bootstrap_servers: Sequence[str],
                 group_id: str, commit_log_topic: str, auto_offset_reset: str,
                 queued_max_messages_kbytes: int, queued_min_messages: int,
                 dogstatsd_host: str, dogstatsd_port: int) -> None:
        self.dataset = get_dataset(dataset_name)
        self.dataset_name = dataset_name
        if not bootstrap_servers:
            self.bootstrap_servers = settings.DEFAULT_DATASET_BROKERS.get(
                dataset_name,
                settings.DEFAULT_BROKERS,
            )
        else:
            self.bootstrap_servers = bootstrap_servers

        self.raw_topic = raw_topic or self.dataset.get_default_topic()
        self.replacements_topic = replacements_topic or self.dataset.get_default_replacement_topic(
        )
        self.commit_log_topic = commit_log_topic or self.dataset.get_default_commit_log_topic(
        )

        self.producer = Producer({
            'bootstrap.servers':
            ','.join(self.bootstrap_servers),
            'partitioner':
            'consistent',
            'message.max.bytes':
            50000000,  # 50MB, default is 1MB
        })

        self.metrics = util.create_metrics(dogstatsd_host,
                                           dogstatsd_port,
                                           'snuba.consumer',
                                           tags=[
                                               "group:%s" % group_id,
                                               "dataset:%s" %
                                               self.dataset_name,
                                           ])

        self.max_batch_size = max_batch_size
        self.max_batch_time_ms = max_batch_time_ms
        self.group_id = group_id
        self.auto_offset_reset = auto_offset_reset
        self.queued_max_messages_kbytes = queued_max_messages_kbytes
        self.queued_min_messages = queued_min_messages
Пример #5
0
from snuba.redis import redis_client
from snuba.request import Request
from snuba.state.cache import Cache, RedisCache
from snuba.state.rate_limit import (
    PROJECT_RATE_LIMIT_NAME,
    RateLimitAggregator,
    RateLimitExceeded,
)
from snuba.util import create_metrics, force_bytes
from snuba.utils.codecs import JSONCodec
from snuba.utils.metrics.timer import Timer
from snuba.web.split import split_query


logger = logging.getLogger("snuba.query")
metrics = create_metrics("snuba.api")


ClickhouseQueryResult = MutableMapping[str, MutableMapping[str, Any]]


class RawQueryException(Exception):
    def __init__(
        self, err_type: str, message: str, stats: Mapping[str, Any], sql: str, **meta
    ):
        self.err_type = err_type
        self.message = message
        self.stats = stats
        self.sql = sql
        self.meta = meta
Пример #6
0
def subscriptions(
    *,
    dataset_name: str,
    topic: Optional[str],
    partitions: Optional[int],
    commit_log_topic: Optional[str],
    commit_log_groups: Sequence[str],
    consumer_group: str,
    auto_offset_reset: str,
    bootstrap_servers: Sequence[str],
    max_batch_size: int,
    max_batch_time_ms: int,
    schedule_ttl: int,
    result_topic: Optional[str],
    log_level: Optional[str],
) -> None:
    """Evaluates subscribed queries for a dataset."""

    assert result_topic is not None

    setup_logging(log_level)
    setup_sentry()

    dataset = get_dataset(dataset_name)

    if not bootstrap_servers:
        bootstrap_servers = settings.DEFAULT_DATASET_BROKERS.get(
            dataset_name, settings.DEFAULT_BROKERS
        )

    loader = enforce_table_writer(dataset).get_stream_loader()

    consumer = TickConsumer(
        SynchronizedConsumer(
            KafkaConsumer(
                build_kafka_consumer_configuration(
                    bootstrap_servers,
                    consumer_group,
                    auto_offset_reset=auto_offset_reset,
                ),
                PassthroughCodec(),
            ),
            KafkaConsumer(
                build_kafka_consumer_configuration(
                    bootstrap_servers,
                    f"subscriptions-commit-log-{uuid.uuid1().hex}",
                    auto_offset_reset="earliest",
                ),
                CommitCodec(),
            ),
            (
                Topic(commit_log_topic)
                if commit_log_topic is not None
                else Topic(loader.get_commit_log_topic_spec().topic_name)
            ),
            set(commit_log_groups),
        )
    )

    producer = KafkaProducer(
        {
            "bootstrap.servers": ",".join(bootstrap_servers),
            "partitioner": "consistent",
            "message.max.bytes": 50000000,  # 50MB, default is 1MB
        },
        SubscriptionResultCodec(),
    )

    with closing(consumer), closing(producer):
        batching_consumer = BatchingConsumer(
            consumer,
            (
                Topic(topic)
                if topic is not None
                else Topic(loader.get_default_topic_spec().topic_name)
            ),
            SubscriptionWorker(
                SubscriptionExecutor(
                    dataset,
                    ThreadPoolExecutor(
                        max_workers=settings.SUBSCRIPTIONS_MAX_CONCURRENT_QUERIES
                    ),
                ),
                {
                    index: SubscriptionScheduler(
                        RedisSubscriptionDataStore(
                            redis_client, dataset, PartitionId(index)
                        ),
                        PartitionId(index),
                        cache_ttl=timedelta(seconds=schedule_ttl),
                    )
                    for index in range(
                        partitions
                        if partitions is not None
                        else loader.get_default_topic_spec().partitions_number
                    )
                },
                producer,
                Topic(result_topic),
            ),
            max_batch_size,
            max_batch_time_ms,
            create_metrics(
                "snuba.subscriptions",
                tags={"group": consumer_group, "dataset": dataset_name},
            ),
        )

        def handler(signum, frame) -> None:
            batching_consumer.signal_shutdown()

        signal.signal(signal.SIGINT, handler)
        signal.signal(signal.SIGTERM, handler)

        batching_consumer.run()
Пример #7
0
def replacer(
    *,
    replacements_topic: Optional[str],
    consumer_group: str,
    bootstrap_server: Sequence[str],
    dataset_name: str,
    max_batch_size: int,
    max_batch_time_ms: int,
    auto_offset_reset: str,
    queued_max_messages_kbytes: int,
    queued_min_messages: int,
    log_level: Optional[str] = None,
) -> None:

    from snuba import util
    from snuba.clickhouse.native import ClickhousePool
    from snuba.replacer import ReplacerWorker
    from snuba.utils.codecs import PassthroughCodec
    from snuba.utils.streams.batching import BatchingConsumer
    from snuba.utils.streams.kafka import (
        KafkaConsumer,
        KafkaPayload,
        TransportError,
        build_kafka_consumer_configuration,
    )
    from snuba.utils.streams.types import Topic

    setup_logging(log_level)
    setup_sentry()

    dataset = get_dataset(dataset_name)

    stream_loader = enforce_table_writer(dataset).get_stream_loader()
    default_replacement_topic_spec = stream_loader.get_replacement_topic_spec()
    assert (
        default_replacement_topic_spec
        is not None), f"Dataset {dataset} does not have a replacement topic."
    replacements_topic = replacements_topic or default_replacement_topic_spec.topic_name

    metrics = util.create_metrics("snuba.replacer",
                                  tags={"group": consumer_group})

    client_settings = {
        # Replacing existing rows requires reconstructing the entire tuple for each
        # event (via a SELECT), which is a Hard Thing (TM) for columnstores to do. With
        # the default settings it's common for ClickHouse to go over the default max_memory_usage
        # of 10GB per query. Lowering the max_block_size reduces memory usage, and increasing the
        # max_memory_usage gives the query more breathing room.
        "max_block_size": settings.REPLACER_MAX_BLOCK_SIZE,
        "max_memory_usage": settings.REPLACER_MAX_MEMORY_USAGE,
        # Don't use up production cache for the count() queries.
        "use_uncompressed_cache": 0,
    }

    clickhouse = ClickhousePool(
        settings.CLICKHOUSE_HOST,
        settings.CLICKHOUSE_PORT,
        client_settings=client_settings,
    )

    codec: PassthroughCodec[KafkaPayload] = PassthroughCodec()
    replacer = BatchingConsumer(
        KafkaConsumer(
            build_kafka_consumer_configuration(
                bootstrap_servers=bootstrap_server,
                group_id=consumer_group,
                auto_offset_reset=auto_offset_reset,
                queued_max_messages_kbytes=queued_max_messages_kbytes,
                queued_min_messages=queued_min_messages,
            ),
            codec=codec,
        ),
        Topic(replacements_topic),
        worker=ReplacerWorker(clickhouse, dataset, metrics=metrics),
        max_batch_size=max_batch_size,
        max_batch_time=max_batch_time_ms,
        metrics=metrics,
        recoverable_errors=[TransportError],
    )

    def handler(signum, frame) -> None:
        replacer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    replacer.run()
Пример #8
0
    logging.basicConfig(
        level=getattr(logging, level.upper()),
        format=settings.LOG_FORMAT,
    )


def traces_sampler(sampling_context: Mapping[str, Any]) -> Any:
    return sampling_context["parent_sampled"] or False


def setup_sentry() -> None:
    sentry_sdk.init(
        dsn=settings.SENTRY_DSN,
        integrations=[
            FlaskIntegration(),
            GnuBacktraceIntegration(),
            LoggingIntegration(event_level=logging.WARNING),
            RedisIntegration(),
        ],
        release=os.getenv("SNUBA_RELEASE"),
        traces_sampler=traces_sampler,
    )


metrics = create_metrics(
    "snuba",
    tags=None,
    sample_rates=settings.DOGSTATSD_SAMPLING_RATES,
)
Пример #9
0
    extract_user,
    flatten_nested_field,
)

from snuba.processor import (
    _as_dict_safe,
    MessageProcessor,
    ProcessorAction,
    ProcessedMessage,
    _ensure_valid_date,
    _ensure_valid_ip,
    _unicodify,
)
from snuba.util import create_metrics

metrics = create_metrics("snuba.transactions.processor")

UNKNOWN_SPAN_STATUS = 2


class TransactionsMessageProcessor(MessageProcessor):
    PROMOTED_TAGS = {
        "environment",
        "sentry:release",
        "sentry:user",
        "sentry:dist",
    }

    def __extract_timestamp(self, field):
        timestamp = _ensure_valid_date(datetime.fromtimestamp(field))
        if timestamp is None:
Пример #10
0
def setup_logging(level: Optional[str] = None) -> None:
    if level is None:
        level = settings.LOG_LEVEL

    logging.basicConfig(
        level=getattr(logging, level.upper()),
        format=settings.LOG_FORMAT,
    )


def setup_sentry() -> None:
    sentry_sdk.init(
        dsn=settings.SENTRY_DSN,
        integrations=[FlaskIntegration(),
                      GnuBacktraceIntegration()],
        release=os.getenv("SNUBA_RELEASE"),
    )


clickhouse_rw = ClickhousePool(settings.CLICKHOUSE_HOST,
                               settings.CLICKHOUSE_PORT)
clickhouse_ro = ClickhousePool(
    settings.CLICKHOUSE_HOST,
    settings.CLICKHOUSE_PORT,
    client_settings={"readonly": True},
)

metrics = create_metrics("snuba")

reader: Reader[ClickhouseQuery] = NativeDriverReader(clickhouse_ro)
Пример #11
0
from typing import Any, MutableMapping, NamedTuple

from snuba import settings, state
from snuba.clickhouse.native import ClickhousePool
from snuba.clickhouse.query import ClickhouseQuery
from snuba.query.columns import all_referenced_columns
from snuba.request import Request
from snuba.state.rate_limit import RateLimitAggregator, RateLimitExceeded, PROJECT_RATE_LIMIT_NAME
from snuba.util import (
    create_metrics,
    force_bytes,
    Timer,
)

logger = logging.getLogger('snuba.query')
metrics = create_metrics(settings.DOGSTATSD_HOST, settings.DOGSTATSD_PORT,
                         'snuba.api')


class QueryResult(NamedTuple):
    # TODO: Give a better abstraction to QueryResult
    result: MutableMapping[str, MutableMapping[str, Any]]
    status: int


def raw_query(
    request: Request,
    query: ClickhouseQuery,
    client: ClickhousePool,
    timer: Timer,
    stats=None,
) -> QueryResult:
Пример #12
0
    def __init__(
        self,
        dataset_name: str,
        raw_topic: Optional[str],
        replacements_topic: Optional[str],
        max_batch_size: int,
        max_batch_time_ms: int,
        bootstrap_servers: Sequence[str],
        group_id: str,
        commit_log_topic: Optional[str],
        auto_offset_reset: str,
        queued_max_messages_kbytes: int,
        queued_min_messages: int,
        rapidjson_deserialize: bool,
        rapidjson_serialize: bool,
        commit_retry_policy: Optional[RetryPolicy] = None,
    ) -> None:
        self.dataset = get_dataset(dataset_name)
        self.dataset_name = dataset_name
        if not bootstrap_servers:
            self.bootstrap_servers = settings.DEFAULT_DATASET_BROKERS.get(
                dataset_name, settings.DEFAULT_BROKERS,
            )
        else:
            self.bootstrap_servers = bootstrap_servers

        stream_loader = enforce_table_writer(self.dataset).get_stream_loader()

        self.raw_topic: Topic
        if raw_topic is not None:
            self.raw_topic = Topic(raw_topic)
        else:
            self.raw_topic = Topic(stream_loader.get_default_topic_spec().topic_name)

        self.replacements_topic: Optional[Topic]
        if replacements_topic is not None:
            self.replacements_topic = Topic(replacements_topic)
        else:
            replacement_topic_spec = stream_loader.get_replacement_topic_spec()
            if replacement_topic_spec is not None:
                self.replacements_topic = Topic(replacement_topic_spec.topic_name)
            else:
                self.replacements_topic = None

        self.commit_log_topic: Optional[Topic]
        if commit_log_topic is not None:
            self.commit_log_topic = Topic(commit_log_topic)
        else:
            commit_log_topic_spec = stream_loader.get_commit_log_topic_spec()
            if commit_log_topic_spec is not None:
                self.commit_log_topic = Topic(commit_log_topic_spec.topic_name)
            else:
                self.commit_log_topic = None

        # XXX: This can result in a producer being built in cases where it's
        # not actually required.
        self.producer = Producer(
            {
                "bootstrap.servers": ",".join(self.bootstrap_servers),
                "partitioner": "consistent",
                "message.max.bytes": 50000000,  # 50MB, default is 1MB
            }
        )

        self.metrics = util.create_metrics(
            "snuba.consumer", tags={"group": group_id, "dataset": self.dataset_name},
        )

        self.max_batch_size = max_batch_size
        self.max_batch_time_ms = max_batch_time_ms
        self.group_id = group_id
        self.auto_offset_reset = auto_offset_reset
        self.queued_max_messages_kbytes = queued_max_messages_kbytes
        self.queued_min_messages = queued_min_messages

        if commit_retry_policy is None:
            commit_retry_policy = BasicRetryPolicy(
                3,
                constant_delay(1),
                lambda e: isinstance(e, KafkaException)
                and e.args[0].code()
                in (
                    KafkaError.REQUEST_TIMED_OUT,
                    KafkaError.NOT_COORDINATOR_FOR_GROUP,
                    KafkaError._WAIT_COORD,
                ),
            )

        self.__commit_retry_policy = commit_retry_policy
        self.__rapidjson_deserialize = rapidjson_deserialize
        self.__rapidjson_serialize = rapidjson_serialize
Пример #13
0
def replacer(*, replacements_topic, consumer_group, bootstrap_server,
             clickhouse_host, clickhouse_port, dataset, max_batch_size,
             max_batch_time_ms, auto_offset_reset, queued_max_messages_kbytes,
             queued_min_messages, log_level, dogstatsd_host, dogstatsd_port):

    import sentry_sdk
    from snuba import util
    from snuba.clickhouse.native import ClickhousePool
    from snuba.replacer import ReplacerWorker
    from snuba.utils.streams.batching import BatchingConsumer
    from snuba.utils.streams.kafka import KafkaConsumer, TransportError, build_kafka_consumer_configuration

    sentry_sdk.init(dsn=settings.SENTRY_DSN)
    dataset = get_dataset(dataset)

    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')

    stream_loader = enforce_table_writer(dataset).get_stream_loader()
    default_replacement_topic_spec = stream_loader.get_replacement_topic_spec()
    assert default_replacement_topic_spec is not None, f"Dataset {dataset} does not have a replacement topic."
    replacements_topic = replacements_topic or default_replacement_topic_spec.topic_name

    metrics = util.create_metrics(dogstatsd_host,
                                  dogstatsd_port,
                                  'snuba.replacer',
                                  tags={"group": consumer_group})

    client_settings = {
        # Replacing existing rows requires reconstructing the entire tuple for each
        # event (via a SELECT), which is a Hard Thing (TM) for columnstores to do. With
        # the default settings it's common for ClickHouse to go over the default max_memory_usage
        # of 10GB per query. Lowering the max_block_size reduces memory usage, and increasing the
        # max_memory_usage gives the query more breathing room.
        'max_block_size': settings.REPLACER_MAX_BLOCK_SIZE,
        'max_memory_usage': settings.REPLACER_MAX_MEMORY_USAGE,
        # Don't use up production cache for the count() queries.
        'use_uncompressed_cache': 0,
    }

    clickhouse = ClickhousePool(
        host=clickhouse_host,
        port=clickhouse_port,
        client_settings=client_settings,
    )

    replacer = BatchingConsumer(
        KafkaConsumer(
            build_kafka_consumer_configuration(
                bootstrap_servers=bootstrap_server,
                group_id=consumer_group,
                auto_offset_reset=auto_offset_reset,
                queued_max_messages_kbytes=queued_max_messages_kbytes,
                queued_min_messages=queued_min_messages,
            ), ),
        replacements_topic,
        worker=ReplacerWorker(clickhouse, dataset, metrics=metrics),
        max_batch_size=max_batch_size,
        max_batch_time=max_batch_time_ms,
        metrics=metrics,
        recoverable_errors=[TransportError],
    )

    def handler(signum, frame):
        replacer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    replacer.run()
Пример #14
0
import uuid

from snuba import settings
from snuba.processor import (_as_dict_safe, MessageProcessor, ProcessorAction,
                             ProcessedMessage, _ensure_valid_date,
                             _ensure_valid_ip, _unicodify)
from snuba.datasets.events_processor import (
    enforce_retention,
    extract_base,
    extract_extra_contexts,
    extract_extra_tags,
    extract_user,
)
from snuba.util import create_metrics

metrics = create_metrics(settings.DOGSTATSD_HOST, settings.DOGSTATSD_PORT,
                         'snuba.transactions.processor')


class TransactionsMessageProcessor(MessageProcessor):
    PROMOTED_TAGS = {
        "environment",
        "sentry:release",
        "sentry:user",
        "sentry:dist",
    }

    def __extract_timestamp(self, field):
        timestamp = _ensure_valid_date(datetime.fromtimestamp(field))
        if timestamp is None:
            timestamp = datetime.utcnow()
        milliseconds = int(timestamp.microsecond / 1000)