Exemplo n.º 1
0
def test_murmur2_java_compatibility():
    p = Murmur2Partitioner(range(1000))
    # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner
    assert p.partition(b'') == 681
    assert p.partition(b'a') == 524
    assert p.partition(b'ab') == 434
    assert p.partition(b'abc') == 107
    assert p.partition(b'123456789') == 566
    assert p.partition(b'\x00 ') == 742
Exemplo n.º 2
0
 def test_murmur2_java_compatibility(self):
     p = Murmur2Partitioner(range(1000))
     # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner
     self.assertEqual(681, p.partition(b''))
     self.assertEqual(524, p.partition(b'a'))
     self.assertEqual(434, p.partition(b'ab'))
     self.assertEqual(107, p.partition(b'abc'))
     self.assertEqual(566, p.partition(b'123456789'))
     self.assertEqual(742, p.partition(b'\x00 '))
Exemplo n.º 3
0
    def run(self):
        """Publish video frames as json objects, timestamped, marked with camera number.

        Source:
            self.video_path: URL for streaming video
            self.kwargs["use_cv2"]: use raw cv2 streaming, set to false to use smart fast streaming --> not every frame is sent.
        Publishes:
            A dict {"frame": string(base64encodedarray), "dtype": obj.dtype.str, "shape": obj.shape,
                    "timestamp": time.time(), "camera": camera, "frame_num": frame_num}
        """

        if self.rr_distribute:
            partitioner = RoundRobinPartitioner(partitions=
                                                [TopicPartition(topic=self.frame_topic, partition=i)
                                                 for i in range(self.topic_partitions)])

        else:

            partitioner = Murmur2Partitioner(partitions=
                                             [TopicPartition(topic=self.frame_topic, partition=i)
                                              for i in range(self.topic_partitions)])

        # Producer object, set desired partitioner
        frame_producer = KafkaProducer(bootstrap_servers=["kafka1-kafka-brokers:9092"],
                                       key_serializer=lambda key: str(key).encode(),
                                       value_serializer=lambda value: json.dumps(value).encode(),
                                       partitioner=partitioner)

        print("[CAM {}] URL: {}, SET PARTITIONS FOR FRAME TOPIC: {}".format(self.camera_num,
                                                                            self.video_path,
                                                                            frame_producer.partitions_for(
                                                                                self.frame_topic)))
        # Use either option
        video = cv2.VideoCapture(self.video_path) if self.use_cv2 else FileVideoStream(self.video_path).start()
        #video.set(cv2.CAP_PROP_FPS,30)

        # Track frame number
        frame_num = 0
        start_time = time.time()
        print("[CAM {}] START TIME {}: ".format(self.camera_num, start_time))

        # Read URL, Transform, Publish
        while True:

            # using raw cv2, frame by frame
            if self.use_cv2:
                success, image = video.read()
                # check if the file has read
                if not success:
                    if self.verbose:
                        print("[CAM {}] URL: {}, END FRAME: {}".format(self.name,
                                                                       self.video_path,
                                                                       frame_num))
                    break

            # using smart, only unique frames, skips frames, faster fps
            else: 
                image = video.read()
                # check if the file has read
                if image is None:
                    if self.verbose:
                        print("[CAM {}] URL: {}, END FRAME: {}".format(self.name,
                                                                       self.video_path,
                                                                       frame_num))
                    break

            # Attach metadata to frame, transform into JSON
            message = self.transform(frame=image,
                                     frame_num=frame_num,
                                     object_key=self.object_key,
                                     camera=self.camera_num,
                                     verbose=self.verbose)

            # Partition to be sent to
            part = frame_num % self.topic_partitions
            # Logging
            if self.verbose:
                print("\r[PRODUCER][Cam {}] FRAME: {} TO PARTITION: {}".format(message["camera"],
                                                                               frame_num, part))
            # Publish to specific partition
            frame_producer.send(self.frame_topic, key="{}-{}".format(self.camera_num, frame_num), value=message)

            # if frame_num % 1000 == 0:
            frame_producer.flush()

            frame_num += 1

        # clear the capture
        if self.use_cv2:
            video.release()
        else:
            video.stop()

        if self.verbose:
            print("[CAM {}] FINISHED. STREAM TIME {}: ".format(self.camera_num, time.time() - start_time))

        return True if frame_num > 0 else False
Exemplo n.º 4
0
def test_hash_encoding():
    p = Murmur2Partitioner(range(1000))
    assert p.partition('test') == p.partition(u'test')
Exemplo n.º 5
0
def test_hash_bytes():
    p = Murmur2Partitioner(range(1000))
    assert p.partition(bytearray(b'test')) == p.partition(b'test')
Exemplo n.º 6
0
    def run(self):
        """Publish video frames as json objects, timestamped, marked with camera number.
        Source:
            self.video_path: URL for streaming video
            self.kwargs["use_cv2"]: use raw cv2 streaming, set to false to use smart fast streaming --> not every frame is sent.
        Publishes:
            A dict {"frame": string(base64encodedarray), "dtype": obj.dtype.str, "shape": obj.shape,
                    "timestamp": time.time(), "camera": camera, "frame_num": frame_num}
        """
        if self.rr_distribute:
            partitioner = RoundRobinPartitioner(partitions=[
                TopicPartition(topic=self.frame_topic, partition=i)
                for i in range(self.topic_partitions)
            ])
        else:
            partitioner = Murmur2Partitioner(partitions=[
                TopicPartition(topic=self.frame_topic, partition=i)
                for i in range(self.topic_partitions)
            ])

        # Producer object, set desired partitioner
        frame_producer = KafkaProducer(
            bootstrap_servers=[params.KAFKA_BROKER],
            key_serializer=lambda key: str(key).encode(),
            value_serializer=lambda value: json.dumps(value).encode(),
            partitioner=partitioner,
            max_request_size=134217728)

        print("[CAM {}] URL: {}, SET PARTITIONS FOR FRAME TOPIC: {}".format(
            self.camera_num, self.video_path,
            frame_producer.partitions_for(self.frame_topic)))
        # Use either option
        if self.use_cv2:
            # video = cv2.VideoCapture(self.video_path)
            # Here we use sampler to read all videos from a folder
            self.sampler.add_video(self.video_path)
        else:
            video = VideoStream(self.video_path).start()

        # Track frame number
        frame_num = 0
        start_time = time.time()
        print("[CAM {}] START TIME {}: ".format(self.camera_num, start_time))

        while True:
            if self.use_cv2:
                success, image, self.location = self.sampler.read()
                if not success:
                    if self.verbose:
                        print("[CAM {}] URL: {}, END FRAME: {}".format(
                            self.name, self.video_path, frame_num))
                    break
            else:
                image = video.read()
                if image is None:
                    if self.verbose:
                        print("[CAM {}] URL: {}, END FRAME: {}".format(
                            self.name, self.video_path, frame_num))
                    break
            # Attach metadata to frame, transform into JSON
            message = self.transform(frame=image,
                                     frame_num=frame_num,
                                     location=self.location,
                                     object_key=self.object_key,
                                     camera=self.camera_num,
                                     verbose=self.verbose)
            self.sizecnt += 1
            if time.time() - self.timer > self.report_range:
                acc = self.sizecnt
                #if self.verbose:
                print("[Cam {}]Minute {} send out size {}".format(
                    self.camera_num,
                    int(self.timer - self.zerotime) // self.report_range, acc))
                self.sizecnt = 0
                self.timer = time.time()

            # Callback function
            def on_send_success(record_metadata):
                print(record_metadata.topic)
                print(record_metadata.partition)
                print(record_metadata.offset)

            def on_send_error(excp):
                print(excp)
                # log.error('I am an errback', exc_info=excp)

            #  Partition to be sent to
            part = frame_num % self.topic_partitions
            # Logging
            # Publish to specific partition
            if self.verbose:
                print("\r[PRODUCER][Cam {}] FRAME: {} TO PARTITION: {}".format(
                    message["camera"], frame_num, part))
                frame_producer.send(
                    self.frame_topic,
                    key="{}_{}".format(self.camera_num, frame_num),
                    value=message).add_callback(on_send_success).add_errback(
                        on_send_error)
            else:
                frame_producer.send(self.frame_topic,
                                    key="{}_{}".format(self.camera_num,
                                                       frame_num),
                                    value=message)

            # if frame_num % 1000 == 0:
            frame_producer.flush()

            frame_num += 1

        if self.use_cv2:
            self.sampler.release()
        else:
            video.stop()

        if self.verbose:
            print("[CAM {}] FINISHED. STREAM TIME {}: ".format(
                self.camera_num,
                time.time() - start_time))

        return True if frame_num > 0 else False
    def run(self):
        """Consume raw frames, detects faces, finds their encoding [PRE PROCESS],
           predictions Published to processed_frame_topic fro face matching."""

        # Connect to kafka, Consume frame obj bytes deserialize to json
        partition_assignment_strategy = [
            RoundRobinPartitionAssignor
        ] if self.rr_distribute else [
            RangePartitionAssignor, RoundRobinPartitionAssignor
        ]

        frame_consumer = KafkaConsumer(
            group_id="consume",
            client_id=self.iam,
            bootstrap_servers=["kafka1-kafka-brokers:9092"],
            key_deserializer=lambda key: key.decode(),
            value_deserializer=lambda value: json.loads(value.decode()),
            partition_assignment_strategy=partition_assignment_strategy,
            auto_offset_reset="earliest")

        frame_consumer.subscribe([self.frame_topic])

        # partitioner for processed frame topic
        if self.rr_distribute:
            partitioner = RoundRobinPartitioner(partitions=[
                TopicPartition(topic=self.frame_topic, partition=i)
                for i in range(self.topic_partitions)
            ])

        else:

            partitioner = Murmur2Partitioner(partitions=[
                TopicPartition(topic=self.frame_topic, partition=i)
                for i in range(self.topic_partitions)
            ])
        #  Produces prediction object
        processed_frame_producer = KafkaProducer(
            bootstrap_servers=["kafka1-kafka-brokers:9092"],
            key_serializer=lambda key: str(key).encode(),
            value_serializer=lambda value: json.dumps(value).encode(),
            partitioner=partitioner)

        try:
            while True:

                if self.verbose:
                    print(
                        "[ConsumeFrames {}] WAITING FOR NEXT FRAMES..".format(
                            socket.gethostname()))

                raw_frame_messages = frame_consumer.poll(timeout_ms=10,
                                                         max_records=2000)

                for topic_partition, msgs in raw_frame_messages.items():

                    # Get the predicted Object, JSON with frame and meta info about the frame
                    for msg in msgs:
                        # get pre processing result
                        result = self.get_processed_frame_object(
                            msg.value, self.scale)

                        tp = TopicPartition(msg.topic, msg.partition)
                        offsets = {tp: OffsetAndMetadata(msg.offset, None)}
                        frame_consumer.commit(offsets=offsets)

                        # Partition to be sent to
                        processed_frame_producer.send(
                            self.processed_frame_topic,
                            key="{}-{}".format(result["camera"],
                                               result["frame_num"]),
                            value=result)

                    processed_frame_producer.flush()

        except KeyboardInterrupt as e:
            print(e)
            pass

        finally:
            print("Closing Stream")
            frame_consumer.close()
Exemplo n.º 8
0
    def run(self):
        # Connect to kafka, Consume frame obj bytes deserialize to json
        partition_assignment_strategy = [
            RoundRobinPartitionAssignor
        ] if self.rr_distribute else [
            RangePartitionAssignor, RoundRobinPartitionAssignor
        ]

        porter_consumer = KafkaConsumer(
            group_id=self.group_id,
            client_id=self.iam,
            bootstrap_servers=[params.KAFKA_BROKER],
            key_deserializer=lambda key: key.decode(),
            value_deserializer=lambda value: json.loads(value.decode()),
            partition_assignment_strategy=partition_assignment_strategy,
            auto_offset_reset="earliest")

        porter_consumer.subscribe([self.frame_topic])
        # partitioner for processed frame topic
        if self.rr_distribute:
            partitioner = RoundRobinPartitioner(partitions=[
                TopicPartition(topic=self.frame_topic, partition=i)
                for i in range(self.topic_partitions)
            ])
        else:
            partitioner = Murmur2Partitioner(partitions=[
                TopicPartition(topic=self.frame_topic, partition=i)
                for i in range(self.topic_partitions)
            ])
        #  Produces prediction object
        url_producer = KafkaProducer(
            bootstrap_servers=[params.KAFKA_BROKER],
            key_serializer=lambda key: str(key).encode(),
            value_serializer=lambda value: json.dumps(value).encode(),
            partitioner=partitioner)

        try:
            while True:
                # if self.verbose:
                # print("[ConsumeFrames {}] WAITING FOR NEXT FRAMES..".format(socket.gethostname()))
                raw_frame_messages = porter_consumer.poll(timeout_ms=10,
                                                          max_records=10)
                for topic_partition, msgs in raw_frame_messages.items():

                    for msg in msgs:

                        result = self.store_tmp_frame(msg.value)

                        if self.verbose:
                            print(result["frame_num"])
                            print(result['s3_key'])
                        tp = TopicPartition(msg.topic, msg.partition)
                        offsets = {tp: OffsetAndMetadata(msg.offset, None)}
                        porter_consumer.commit(offsets=offsets)

                        # Partition to be sent to
                        url_producer.send(self.url_topic,
                                          key="{}_{}".format(
                                              result["camera"],
                                              result["frame_num"]),
                                          value=result)

                    url_producer.flush()
        except KeyboardInterrupt as e:
            print(e)
            pass

        finally:
            print("Closing Stream")
            porter_consumer.close()
Exemplo n.º 9
0
    def run(self):
        """Consume raw frames, detects faces, finds their encoding [PRE PROCESS],
           predictions Published to processed_frame_topic fro face matching."""

        # Connect to kafka, Consume frame obj bytes deserialize to json
        partition_assignment_strategy = [
            RoundRobinPartitionAssignor
        ] if self.rr_distribute else [
            RangePartitionAssignor, RoundRobinPartitionAssignor
        ]

        meta_consumer = KafkaConsumer(
            group_id=self.group_id,
            client_id=self.iam,
            bootstrap_servers=[params.KAFKA_BROKER],
            key_deserializer=lambda key: key.decode(),
            value_deserializer=lambda value: json.loads(value.decode()),
            partition_assignment_strategy=partition_assignment_strategy,
            auto_offset_reset="earliest")

        meta_consumer.subscribe([self.meta_topic])

        # partitioner for processed frame topic
        if self.rr_distribute:
            partitioner = RoundRobinPartitioner(partitions=[
                TopicPartition(topic=self.value_topic, partition=i)
                for i in range(self.topic_partitions)
            ])

        else:
            partitioner = Murmur2Partitioner(partitions=[
                TopicPartition(topic=self.value_topic, partition=i)
                for i in range(self.topic_partitions)
            ])

        #  Produces prediction object
        value_producer = KafkaProducer(
            bootstrap_servers=[params.KAFKA_BROKER],
            key_serializer=lambda key: str(key).encode(),
            value_serializer=lambda value: json.dumps(value).encode(),
            partitioner=partitioner)
        history_cnt = None

        try:
            while True:

                meta_messages = meta_consumer.poll(timeout_ms=10,
                                                   max_records=10)

                for topic_partition, msgs in meta_messages.items():
                    # Get the predicted Object, JSON with frame and meta info about the frame
                    for msg in msgs:
                        # get pre processing result
                        if self.verbose:
                            print("TEST0 {}".format(msg.value))
                        result = parse_mapper(msg.value)
                        self.transfer_scene_type(result)

                        # Using the buffer to keep time order
                        heappush(self.buffer,
                                 (result['frame_num'], json.dumps(result)))

                        if len(self.buffer) < self.uppersize:
                            tp = TopicPartition(msg.topic, msg.partition)
                            offsets = {tp: OffsetAndMetadata(msg.offset, None)}
                            meta_consumer.commit(offsets=offsets)
                            continue

                        result = json.loads(heappop(self.buffer)[1])
                        if self.verbose:
                            print("TEST1 {}".format(result))
                        # Extract keyframe
                        new_cnt = Counter(result['counts'])
                        if history_cnt is None:
                            result['is_keyframe'] = True
                        else:
                            result['is_keyframe'] = (new_cnt != history_cnt)
                        history_cnt = new_cnt
                        if self.verbose:
                            print("TEST2 {}".format(result))
                        # Scene statistic
                        scenecnt = Counter(result['scenes'])
                        self.counter += scenecnt
                        self.frame_cnt += 1
                        if result['is_keyframe']:
                            self.keyframe_cnt += 1

                        # Need to be refined later
                        result['valuable'] = self.is_valuable(result)

                        # Update some statistic informations every minute
                        if time.time() - self.timer > 10:
                            self.update_acc_table(result)
                            print('Extractor {}'.format(result))

                        if self.verbose:
                            print("[Extractor done]")
                            print(result)

                        tp = TopicPartition(msg.topic, msg.partition)
                        offsets = {tp: OffsetAndMetadata(msg.offset, None)}
                        meta_consumer.commit(offsets=offsets)

                        # Partition to be sent to
                        value_producer.send(self.value_topic, value=result)

                    value_producer.flush()

        except KeyboardInterrupt as e:
            print(e)
            pass

        finally:
            while self.buffer:
                result = json.loads(heappop(self.buffer)[1])
                if self.verbose:
                    print("TEST1 {}".format(result))
                # Extract keyframe
                new_cnt = Counter(result['counts'])
                if history_cnt is None:
                    result['is_keyframe'] = True
                else:
                    result['is_keyframe'] = (new_cnt != history_cnt)
                history_cnt = new_cnt
                if self.verbose:
                    print("TEST2 {}".format(result))
                # Scene statistic
                scenecnt = Counter(result['scenes'])
                self.counter += scenecnt
                self.frame_cnt += 1
                if result['is_keyframe']:
                    self.keyframe_cnt += 1

                # Need to be refined later
                result['valuable'] = self.is_valuable(result)

                # Update some statistic informations every minute
                if len(self.buffer) == 0:
                    self.update_acc_table(result)
                    print('Extractor {}'.format(result))

                if self.verbose:
                    print("[Extractor done]")
                    print(result)

                # Partition to be sent to
                value_producer.send(self.value_topic, value=result)
            value_producer.flush()
            print("Closing Stream")
            meta_consumer.close()
Exemplo n.º 10
0
def test_murmur2_java_compatibility(bytes_payload, partition_number):
    p = Murmur2Partitioner(range(1000))
    # compare with output from Kafka's org.apache.kafka.clients.producer.Partitioner
    assert p.partition(bytes_payload) == partition_number
Exemplo n.º 11
0
    def run(self):
        """Consume raw frames, detects faces, finds their encoding [PRE PROCESS],
           predictions Published to processed_frame_topic fro face matching."""

        # Connect to kafka, Consume frame obj bytes deserialize to json
        partition_assignment_strategy = [
            RoundRobinPartitionAssignor
        ] if self.rr_distribute else [
            RangePartitionAssignor, RoundRobinPartitionAssignor
        ]

        url_consumer = KafkaConsumer(
            group_id=self.group_id,
            client_id=self.iam,
            bootstrap_servers=[params.KAFKA_BROKER],
            key_deserializer=lambda key: key.decode(),
            value_deserializer=lambda value: json.loads(value.decode()),
            partition_assignment_strategy=partition_assignment_strategy,
            auto_offset_reset="earliest")

        url_consumer.subscribe([self.url_topic])

        # partitioner for processed frame topic
        if self.rr_distribute:
            partitioner = RoundRobinPartitioner(partitions=[
                TopicPartition(topic=self.obj_topic, partition=i)
                for i in range(self.topic_partitions)
            ])

        else:
            partitioner = Murmur2Partitioner(partitions=[
                TopicPartition(topic=self.obj_topic, partition=i)
                for i in range(self.topic_partitions)
            ])

        #  Produces prediction object
        obj_producer = KafkaProducer(
            bootstrap_servers=[params.KAFKA_BROKER],
            key_serializer=lambda key: str(key).encode(),
            value_serializer=lambda value: json.dumps(value).encode(),
            partitioner=partitioner)
        try:
            while True:

                raw_frame_messages = url_consumer.poll(timeout_ms=10,
                                                       max_records=10)

                for topic_partition, msgs in raw_frame_messages.items():

                    # Get the predicted Object, JSON with frame and meta info about the frame
                    for msg in msgs:
                        # get pre processing result
                        if self.cnt < 0:
                            self.timer = time.time()
                            self.cnt = 0
                        result = self.get_processed_frame_object(msg.value)

                        # Calculate latency:
                        self.cnt += 1

                        if self.cnt == self.latency_period:
                            latency = (time.time() - self.timer) / float(
                                self.latency_period)
                            self.timer = time.time()
                            self.cnt = 0
                            print("[Detection] Latency {}".format(latency))

                        if self.verbose:
                            print(result)

                        tp = TopicPartition(msg.topic, msg.partition)
                        offsets = {tp: OffsetAndMetadata(msg.offset, None)}
                        url_consumer.commit(offsets=offsets)

                        # Partition to be sent to
                        send_topic = self.obj_topic + '_' + str(
                            result['camera'])
                        obj_producer.send(send_topic, value=result)

                    obj_producer.flush()

        except KeyboardInterrupt as e:
            print(e)
            pass

        finally:
            print("Closing Stream")
            url_consumer.close()
Exemplo n.º 12
0
 def test_hash_bytes(self):
     p = Murmur2Partitioner(range(1000))
     self.assertEqual(p.partition(bytearray(b'test')), p.partition(b'test'))
Exemplo n.º 13
0
 def test_hash_encoding(self):
     p = Murmur2Partitioner(range(1000))
     self.assertEqual(p.partition('test'), p.partition(u'test'))