def test_create_topic_exception(self, mocker):
        mock_kafka_admin = mocker.Mock()
        mocker.patch('confluent_kafka.admin.AdminClient',
                     return_value=mock_kafka_admin)
        from confluent_kafka.cimpl import KafkaException
        mock_kafka_admin.create_topics = mocker.MagicMock(
            side_effect=KafkaException('fo'))

        kafka_manager = KafkaTopicManager('kafka.servicex.org:1272')

        result = kafka_manager.create_topic('my-topic',
                                            max_message_size=1024,
                                            num_partitions=100)
        assert not result
def basic_consume_loop(consumer, topics):
    try:
        consumer.assign(topics)
        durs = []
        i=0
        message = {}
        while running:

            msg = consumer.poll(timeout=1.0)
            if msg is None: continue

            message = {}
            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write('%% %s [%d] reached end at offset %d\n' %
                                     (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
            else:

                message = loads(msg.value().decode("utf-8"))
                #print(message)
                if not message['dur_evt_inf'] is None:
                    i = i + 1
                    durs.append(message['dur_evt_inf'])
            if(i==1000000):
                break
            #durs.append(m['dur_evt_inf'])

            if (i % 1000 == 0):
                print(message)
                #now2 = datetime.now()
                print(i)

    finally:
        # Close down consumer to commit final offsets.
        consumer.close()
        #print(durs)
        mean = statistics.mean(durs)
        median = statistics.median(durs)
        max1 = max(durs)
        min2 = min(durs)

        print('max=' + str(max1))
        print('min=' + str(min2))
        print('avg=' + str(mean))
        print('med=' + str(median))
        print('total obs =' + str(len(durs)))
    def poll(self):
        msg = self._consumer.poll(self._config["poll_timeout"])
        if msg is not None:
            err = msg.error()
            if err:
                if err.code() == KafkaError._PARTITION_EOF:
                    return None
                else:
                    self._logger.info(
                        "KafkaConsumer Error {} at pid {}:  topic={} partition=[{}]  reached end at offset {}\n"
                        .format(err.code(), os.getpid(), msg.topic(),
                                msg.partition(), msg.offset()))
                    raise KafkaException(err)

            if msg.value():
                return msg
Exemplo n.º 4
0
    def run(self):
        try:
            self.consumer.assign(self.tls)
            i = 0
            while (True):
                msg = self.consumer.poll(timeout=1.0)
                if msg is None: continue

                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        raise KafkaException(msg.error())
                else:

                    message = loads(msg.value().decode("utf-8"))
                    ingest_ts = message['ingestTs']
                    message_id = message['message_id']
                    truth = message['Class']
                    y_hat = truth  ## Replace with model.predict & model.learn_one(
                    inference_ts = time.time()
                    out = {}
                    out['ingest_ts'] = ingest_ts
                    out['message_id'] = message_id
                    out['truth'] = truth  ## model.learn_one(Y,Y_HAT)
                    out['y_hat'] = y_hat
                    out['inference_ts'] = inference_ts
                    out['dur_evt_inf'] = inference_ts - ingest_ts
                    i = i + 1
                    # sprint(self.result_t)
                    k = str(i)
                    v = json.dumps(out).encode('utf-8')
                    #self.producer.produce(self.result_t, value=v, key=k)
                    # self.producer.flush()

                    if (i % 2 == 0):
                        print('sending to ' + self.result_t + ' ' +
                              str(self.result_t_p) + ' ' + str(out))
                        self.producer.flush()
                #self.producer.close()

        finally:
            # Close down consumer to commit final offsets.
            self.consumer.close()
Exemplo n.º 5
0
    def process_msgs(self, msgs) -> bool:
        points = []
        for msg in msgs:
            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
                return False

            if DEBUG == "true":
                print('Received message: %s' % msg.value().decode('utf-8'),
                      flush=True)
            data_input = json.loads(msg.value().decode('utf-8'))
            if self.filter_msg(data_input):
                body = {
                    "measurement": self.data_measurement,
                    "fields": get_field_values(self.field_config, data_input)
                }
                if self.try_time:
                    try:
                        body["time"] = Tree(data_input).execute(
                            '$.' + self.data_time_mapping)
                    except SyntaxError as err:
                        print(
                            'Disabling reading time from message, error occurred:',
                            err.msg,
                            flush=True)
                        print(
                            'Influx will set time to time of arrival by default',
                            flush=True)
                        self.try_time = False
                if len(self.tag_config) > 0:
                    body["tags"] = get_field_values(self.tag_config,
                                                    data_input)
                if DEBUG == "true":
                    print('Write message: %s' % body, flush=True)
                points.append(body)
        try:
            self.influx_client.write_points(points,
                                            time_precision=self.time_precision)
        except exceptions.InfluxDBClientError as e:
            print('Received Influx error: %s', e.content, flush=True)
        return True
Exemplo n.º 6
0
def run_consumer(container_manager):
    schema_registry_conf = {'url': config['kafka']['schema_registry']}
    schema_registry_client = SchemaRegistryClient(schema_registry_conf)

    avro_deserializer = AvroDeserializer(schemas.run_record_schema,
                                         schema_registry_client)
    string_deserializer = StringDeserializer('utf_8')

    conf = {
        'bootstrap.servers': config['kafka']['servers'],
        'key.deserializer': string_deserializer,
        'value.deserializer': avro_deserializer,
        'group.id': "runs-consumers",
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': 'false'
    }

    consumer = DeserializingConsumer(conf)
    print('[+] Listening for incoming runs')

    try:
        consumer_topics = [config['kafka']['runs-topic']]
        consumer.subscribe(consumer_topics)

        while True:
            try:
                msg = consumer.poll(timeout=1.0)
                if msg is None:
                    continue

                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    print('[-] Run initialization')
                    print(msg.value())
                    consumer.commit(asynchronous=False)
                    # handlers.handle_run_execution(container_manager, msg.value())
                    threading.Thread(target=handlers.handle_run_execution,
                                     args=(container_manager,
                                           msg.value())).start()
            except ConsumeError as e:
                print(
                    f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}'
                )
    finally:
        consumer.close()
Exemplo n.º 7
0
 def mensagem_dados_empresa_consumer(self):
     try:
         self.__consumer.subscribe(self.__topics)
         mensagem = self.__consumer.poll(timeout=1.0)
         if mensagem is None:
             return None
         if mensagem.error():
             if mensagem.error().code() == KafkaError._PARTITION_EOF:
                 # End of partition event
                 sys.stderr.write('%% %s [%d] reached end at offset %d\n' %
                                  (mensagem.topic(), mensagem.partition(),
                                   mensagem.offset()))
             elif mensagem.error():
                 raise KafkaException(mensagem.error())
         else:
             return mensagem
     except Exception:
         print("Erro!")
Exemplo n.º 8
0
        def get_watched_messages(self, interval=0.0):
            logging.debug(
                "Checking messages that appeared on kafka topics: %r",
                self.watching_topics)
            res = []

            while True:
                message = self.consumer.poll(interval)
                if message is None:
                    break  # done reading

                if message.error():
                    raise KafkaException("kafka consumer error: {}".format(
                        message.error()))

                res.append(message)

            # TODO: consumer.close()
            return res
Exemplo n.º 9
0
def basic_consume_loop(consumer, topics):
    try:
        consumer.subscribe(topics)

        while running:
            msg = consumer.poll(timeout=1.0)
            if msg is None: continue

            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
            else:
                msg_process(msg)
    finally:
        # Close down consumer to commit final offsets.
        consumer.close()
Exemplo n.º 10
0
        def get_watched_messages(self, interval=0.0, predicate=lambda x: True):
            logging.debug(
                "Checking messages that appeared on kafka topics: %r",
                self.watching_topics)
            res = []

            start = time.time()
            while True:
                msg = self.consumer.poll(interval)
                if msg is None or time.time() - start > interval:
                    break  # done reading

                if msg.error():
                    raise KafkaException("kafka consumer error: {}".format(
                        msg.error()))

                logging.debug(
                    "Potential message: %r",
                    (msg.partition(), msg.key(), msg.headers(), msg.value()))
                if predicate(msg):
                    res.append(msg)

            # TODO: consumer.close()
            return res
    def run(self):
        ### Set up model, metric, and starting timestamp for this model instance ###
        model = self.model
        metric = self.metric
        print('MODEL and METRIC before any messages consumed:', model, metric)
        start_consume_ts = time.time()
        print('Start Consume Time ' + str(start_consume_ts))
        ######
        try:
            self.consumer.assign(self.tls)
            i = 0
            while (True):
                msg = self.consumer.poll(timeout=1.0)
                if msg is None: continue

                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        raise KafkaException(msg.error())
                else:
                    message = loads(msg.value().decode("utf-8"))
                    ingest_ts = message[
                        'ingestTs']  # Ingestion time from Kafka
                    message.pop('ingestTs')
                    message_id = message['message_id']  # Message ID from Kafka
                    message.pop('message_id')
                    truth = float(
                        message['Class'])  # True label supplied by Kafka
                    message.pop(
                        'Class'
                    )  # Remove label remove passing the data for prediction (could combine with above)
                    message.pop('ignore')
                    x = message
                    for k, v in x.items():
                        x[k] = float(v)
                    y_hat = model.score_one(
                        x)  #model.predict_one(x) # make a prediction
                    if (i % 1000 == 0):
                        print('x: ', x)
                        for k, v in x.items():
                            print(type(x[k]))
                        print('model: ', model)
                        print('y_hat: ', y_hat)
                    inference_ts = time.time()
                    metric = metric.update(truth, y_hat)  # update the metric
                    model = model.learn_one(
                        x)  # model.learn_one(x,y) # make the model learn
                    learn_ts = time.time()
                    out = {}
                    out['ingest_ts'] = ingest_ts
                    out['learn_ts'] = learn_ts
                    out['message_id'] = message_id
                    out['truth'] = truth
                    out['y_hat'] = y_hat
                    out['inference_ts'] = inference_ts
                    out['dur_evt_inf'] = inference_ts - ingest_ts
                    out['dur_start_inf'] = inference_ts - start_consume_ts
                    out['dur_inf_learn'] = learn_ts - inference_ts
                    out['model_metric'] = metric.get()
                    out['ignore'] = False
                    i = i + 1
                    # sprint(self.result_t)
                    k = str(i)
                    v = json.dumps(out).encode('utf-8')
                    self.producer.produce(self.result_t, value=v, key=k)
                    # self.producer.flush()

                    if (i % 1000 == 0):
                        print('sending to ' + self.result_t + ' ' +
                              str(self.result_t_p) + ' ' + str(out))
                        self.producer.flush()
                    if (i % 100000 == 0):
                        try:
                            pickle.dump(model, open("model.pkl", "wb"))
                        except:
                            os.remove("model.pkl")
                            pickle.dump(model, open("model.pkl", "wb"))
                #self.producer.close()

        finally:
            # Close down consumer to commit final offsets.
            self.consumer.close()
Exemplo n.º 12
0
def consumer(networkName, topic):

    es = Elasticsearch('192.168.5.71:9200')
    conf = {
        'bootstrap.servers': "192.168.5.41:9092",
        'group.id': "demo",
        'auto.offset.reset': 'smallest'
    }
    consumer = Consumer(conf)
    topics = [topic]
    consumer.subscribe(topics)
    running = True

    try:
        consumer.subscribe(topics)

        msg_count = 0
        nd_count = 0
        json_object = ""
        while running:
            msg = consumer.poll(timeout=1.0)
            if msg is None: continue

            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
            else:
                # msg_process(msg)
                # print(msg.value().decode("utf-8"))
                # messageStr = (msg.value())
                if "_index" in msg.value().decode("utf-8"):
                    json_object = "{"
                    nd_count += 1
                if nd_count > 0:
                    json_object += (msg.value().decode("utf-8"))
                if msg.value().decode("utf-8").strip() == ",":
                    # wrpcap('filtered.pcap', msg.value(), append=True)
                    with open('json_file_pretty', 'a',
                              encoding='utf-8') as outfile:
                        #  json.dump(messageStr, fp)
                        if json_object != "":
                            x = json.loads(
                                json.dumps(json_object,
                                           sort_keys=True,
                                           indent=4))
                            print(x)
                            print(networkName)
                            json.dump(x, outfile)
                    json_object = ""
                    nd_count = 0
                #  doc = {
                #  'text': messageStr,
                #  'timestamp': datetime.now(),
                # }
                # es.index(index="test-index", body=doc, refresh='wait_for')
                msg_count += 1
                # if msg_count % MIN_COMMIT_COUNT == 0:
                # consumer.commit(async=False)
    finally:
        # Close down consumer to commit final offsets.
        consumer.close()