Пример #1
0
class MetricWriter:
    """
    Helper class for writing metrics in InfluxDB line format. For format details, see the link below.

    https://v2.docs.influxdata.com/v2.0/reference/syntax/line-protocol/
    """
    def __init__(self):
        self._db_client = influxdb.InfluxDBClient(
            host=settings.INFLUXDB_HOST,
            port=settings.INFLUXDB_PORT,
            username=settings.INFLUXDB_USER,
            password=settings.INFLUXDB_PASSWORD,
            database=settings.INFLUXDB_DB_NAME,
        )
        self._logger = Logger(type(self).__name__)

    def write(self, metric_data: List[str]) -> None:
        try:
            self._logger.debug('metric data: {}'.format(metric_data))
            write_result = self._db_client.write_points(
                points=metric_data,
                time_precision='ms',
                protocol='line',
            )
            if not write_result:
                self._logger.warning('could not write to influx')
        except InfluxDBClientError as ex:
            if ex.code == 400:
                # We are expecting to catch the following scenarios:
                #   - writing points that are older than the retention policy
                self._logger.warning(
                    'write_points client error [code={}, content={}]'.format(
                        ex.code, ex.content))
            else:
                raise
Пример #2
0
class StockQuotePipeline:
    def __init__(self, writer: StockQuoteWriter):
        self._writer = writer
        self._logger = Logger(type(self).__name__)

    def handler(self, message: str) -> None:
        """Callback that receives a raw stock quote message.

        :param message: Raw stock quote message.
        """

        self._logger.debug(message)
        quotes = self.parse(message)
        self._writer.write(quotes)

    def parse(self, message: str) -> List[StockQuote]:
        """Converts raw stock quote message into their domain representation.

        :param message: Raw stock quote message.
        :return: List of StockQuote objects. Can be empty.
        """

        try:
            # Ensures that we don't lose any precision while loading the JSON.
            data = json.loads(message, parse_float=lambda val: Decimal(val))
        except json.decoder.JSONDecodeError:
            self._logger.error('unknown message: {}'.format(message))
            return []

        message_type = data.get('type')
        if not message_type:
            self._logger.error('message missing type: {}'.format(data))
            return []

        if data.get('type') == 'ping':
            return []

        if not data.get('data'):
            self._logger.error('message missing data: {}'.format(data))
            return []

        quotes = data['data']
        return list(
            map(
                # Ensure that we always maintain correct data types.
                lambda quote: StockQuote(
                    timestamp=int(quote['t']),
                    symbol=str(quote['s']),
                    price=Decimal(quote['p']),
                    volume=int(quote['v']),
                ),
                quotes,
            ))
Пример #3
0
class KafkaConsumer(StockQuoteListener):

    def __init__(self, brokers: List[str], topic: str):
        self._brokers = brokers
        self._topic = topic
        self._consumer = None
        self._is_done = False
        self._logger = Logger(type(self).__name__)

    def start(self, handler: Callable) -> None:
        """Starts listening for stock quotes if the listener has never been stopped

        :param handler: Callback function invoked for every batch of stock quotes, with the following signature:
            quotes: List[StockQuote]
            return: None
        """
        self._connect()

        partitions = self._consumer.partitions_for_topic(self._topic)
        self._logger.info('partitions: {}'.format(', '.join(map(lambda partition: str(partition), partitions))))

        # Assume that only one partition exists.
        topic_partition = TopicPartition(topic=self._topic, partition=0)
        begin_offsets = self._consumer.beginning_offsets([topic_partition])
        end_offsets = self._consumer.end_offsets([topic_partition])
        last_committed_offset = self._consumer.committed(topic_partition)
        self._logger.info('starting offset: {}'.format(begin_offsets[topic_partition]))
        self._logger.info('last offset: {}'.format(end_offsets[topic_partition]))
        self._logger.info('last committed offset: {}'.format(last_committed_offset))

        while not self._is_done:
            self._process_batch(topic_partition, handler)

        self._logger.info("closing consumer")
        self._consumer.close(autocommit=False)

    def stop(self) -> None:
        self._is_done = True

    def _commit_offsets(self, topic_partition: TopicPartition, offset: int):
        """Commits offsets for the partition of a given topic.

        This effectively advances the index so that future reads from the same Kafka consumer group will not read any
        records up to that offset.

        :param topic_partition: Partition of the topic where offsets are to be committed.
        :param offset: Largest offset read so far.
        :return:
        """

        self._consumer.commit({
            topic_partition: OffsetAndMetadata(offset=offset + 1, metadata=''),
        })

    def _connect(self) -> None:
        self._consumer: kafka.KafkaConsumer = utils.retry(
            lambda: kafka.KafkaConsumer(
                self._topic,
                bootstrap_servers=self._brokers,
                auto_offset_reset='earliest',
                enable_auto_commit=False,
                group_id='my-group',
                value_deserializer=lambda item: pickle.loads(item),
            ),
            None,
            num_retries=15,
            exception_type=NoBrokersAvailable,
            error_message='broker unavailable...',
            logger=self._logger,
        )

    def _poll_records(self, topic_partition: TopicPartition) -> (List[StockQuote], int):
        """Polls for records from the partition of a given topic.

        :param topic_partition: Partition of the topic to be polled.
        :return: Tuple of:
            quotes: List of StockQuote objects received from this round of polling. Can be empty.
            max_offset: The largest offset for the objects received. If no objects were received, return 0.
        """
        result = self._consumer.poll(CONSUMER_POLL_TIMEOUT_MS, max_records=CONSUMER_POLL_MAX_RECORDS)
        if topic_partition not in result:
            return [], 0

        quotes = []
        max_offset = 0
        for message in result[topic_partition]:
            max_offset = max(max_offset, message.offset)
            quote: StockQuote = message.value
            quotes.append(quote)
        return quotes, max_offset

    def _process_batch(self, topic_partition: TopicPartition, handler: Callable) -> None:
        quotes, max_offset = self._poll_records(topic_partition)
        if not quotes:
            return

        handler(quotes)
        self._logger.debug('max offset: {}'.format(max_offset))
        self._commit_offsets(topic_partition, max_offset)