示例#1
0
    def test_fasterrcnn_preprocessor(self):
        args = set_preprocessor_parser().parse_args(
            ['--yaml_path', self.fasterrcnn_yaml])
        c_args = _set_client_parser().parse_args(
            ['--port_in',
             str(args.port_out), '--port_out',
             str(args.port_in)])
        all_zips = zipfile.ZipFile(self.data_path)
        all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()]

        with ServiceManager(PreprocessorService,
                            args), ZmqClient(c_args) as client:
            for req in RequestGenerator.index(all_bytes):
                msg = gnes_pb2.Message()
                msg.request.index.CopyFrom(req.index)
                client.send_message(msg)
                r = client.recv_message()
                for d in r.request.index.docs:
                    self.assertGreater(len(d.chunks), 0)
                    for _ in range(len(d.chunks)):
                        self.assertEqual(
                            len(blob2array(d.chunks[_].blob).shape), 3)
                        self.assertEqual(
                            blob2array(d.chunks[_].blob).shape[-1], 3)
                        self.assertEqual(
                            blob2array(d.chunks[_].blob).shape[0], 224)
                        self.assertEqual(
                            blob2array(d.chunks[_].blob).shape[1], 224)
                        print(blob2array(d.chunks[0].blob).dtype)
示例#2
0
    def test_singleton_preprocessor_service_realdata(self):
        args = set_preprocessor_service_parser().parse_args(
            ['--yaml_path', self.singleton_img_pre_yaml])
        c_args = _set_client_parser().parse_args(
            ['--port_in',
             str(args.port_out), '--port_out',
             str(args.port_in)])
        all_zips = zipfile.ZipFile(os.path.join(self.dirname, 'imgs/test.zip'))
        all_bytes = [all_zips.open(v).read() for v in all_zips.namelist()]

        with PreprocessorService(args), ZmqClient(c_args) as client:
            for req in RequestGenerator.index(all_bytes):
                msg = gnes_pb2.Message()
                msg.request.index.CopyFrom(req.index)
                client.send_message(msg)
                r = client.recv_message()
                self.assertEqual(
                    r.envelope.routes[0].service,
                    'PreprocessorService:BaseSingletonPreprocessor')
                for d in r.request.index.docs:
                    self.assertEqual(len(d.chunks), 1)
                    self.assertEqual(len(blob2array(d.chunks[0].blob).shape),
                                     3)
                    self.assertEqual(blob2array(d.chunks[0].blob).shape[-1], 3)
                    self.assertEqual(
                        blob2array(d.chunks[0].blob).shape[0], 224)
                    self.assertEqual(
                        blob2array(d.chunks[0].blob).shape[1], 224)
                    print(blob2array(d.chunks[0].blob).dtype)
示例#3
0
 def test_get_frames(self):
     doc = copy.deepcopy(self.doc)
     frame_selector = FrameSelectPreprocessor(sframes=3)
     frame_selector.apply(doc)
     for idx, chunk in enumerate(doc.chunks):
         if idx == 0:
             self.assertEqual(blob2array(chunk.blob).shape[0], 2)
         else:
             self.assertEqual(blob2array(chunk.blob).shape[0], 3)
示例#4
0
    def test_benchmark5(self):
        all_msgs = self.build_msgs2()
        all_msgs_bak = copy.deepcopy(all_msgs)

        with ZmqClient(self.c1_args) as c1, ZmqClient(self.c2_args) as c2:
            with TimeContext('send->recv, squeeze_pb=True'):
                for m, m1 in zip(all_msgs, all_msgs_bak):
                    c1.send_message(m, squeeze_pb=True)
                    r_m = c2.recv_message()

                    for d, r_d in zip(m1.request.index.docs, r_m.request.index.docs):
                        for c, r_c in zip(d.chunks, r_d.chunks):
                            np.allclose(blob2array(c.embedding), blob2array(r_c.embedding))
                            np.allclose(blob2array(c.blob), blob2array(r_c.blob))
示例#5
0
文件: file-sys.py 项目: pawanrana/hub
    def apply(self, docs: 'gnes_pb2.Document') -> None:
        """
        write GIFs of each document into disk
        folder structure: /data_path/doc_id/0.gif, 1.gif...
        :param docs: docs
        """
        dirs = os.path.join(self.data_path, str(docs.doc_id))
        if not os.path.exists(dirs):
            os.makedirs(dirs)
        # keep doc meta in .meta file
        with open(os.path.join(dirs, '.meta'), 'wb') as f:
            f.write(docs.meta_info or b'')
            self.logger.info("successfully write meta info for: %s" %
                             str(docs.doc_id))

        self.logger.info("%s has total %d chunks." %
                         (str(docs.doc_id), len(docs.chunks)))
        for i, chunk in enumerate(docs.chunks):
            data_type = chunk.WhichOneof('content')
            if data_type == 'raw':
                with open(os.path.join(dirs, '%d.%s' % (i, self.file_suffix)),
                          'wb') as f:
                    f.write(chunk.raw)
            elif data_type == 'blob':
                np.save(os.path.join(dirs, '%d' % i), blob2array(chunk.blob))
                self.logger.info("successfully write blob %d for: %s" %
                                 (i, str(docs.doc_id)))
            else:
                self.logger.info("data_type is : %s" % str(data_type))
                raise NotImplementedError
示例#6
0
    def apply(self, doc: 'gnes_pb2.Document') -> None:
        super().apply(doc)
        if len(doc.chunks) > 0:
            for chunk in doc.chunks:
                images = blob2array(chunk.blob)

                if len(images) == 0:
                    self.logger.warning("this chunk has no frame!")
                elif self.sframes == 1:
                    idx = int(len(images) / 2)
                    frame = np.array(Image.fromarray(images[idx].astype('uint8')).resize((self.target_width, self.target_height)))
                    frame = np.expand_dims(frame, axis=0)
                    # self.logger.info("choose one frame, the shape is: (%d, %d, %d, %d)" % (
                    #     frame.shape[0], frame.shape[1], frame.shape[2], frame.shape[3]
                    # ))
                    chunk.blob.CopyFrom(array2blob(frame))
                elif self.sframes > 0 and len(images) > self.sframes:
                    if len(images) >= 2 * self.sframes:
                        step = math.ceil(len(images) / self.sframes)
                        frames = images[::step]
                    else:
                        idx = np.sort(np.random.choice(len(images), self.sframes, replace=False))
                        frames = images[idx]

                    frames = np.array(
                        [np.array(Image.fromarray(img.astype('uint8')).resize((self.target_width, self.target_height)))
                         for img in frames])
                    chunk.blob.CopyFrom(array2blob(frames))
                del images
        else:
            self.logger.error(
                'bad document: "doc.chunks" is empty!')
示例#7
0
 def apply(self, doc: 'gnes_pb2.Document') -> None:
     super().apply(doc)
     if len(doc.chunks) > 0:
         for chunk in doc.chunks:
             images = blob2array(chunk.blob)
             chunk.raw = self._encode(images)
     elif doc.WhichOneof('raw_data'):
         raw_type = type(getattr(doc, doc.WhichOneof('raw_data')))
         if raw_type == gnes_pb2.NdArray:
             images = blob2array(doc.raw_video)
             doc.raw_bytes = self._encode(images)
         else:
             self.logger.error('bad document: "doc.raw_video" is empty!')
     else:
         self.logger.error(
             'bad document: "doc.chunks" and "doc.raw_video" is empty!')
示例#8
0
def img_process_for_test(dirname):
    zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip'))
    all_bytes = [zipfile_.open(v).read() for v in zipfile_.namelist()]
    test_img = []
    for raw_bytes in all_bytes:
        d = gnes_pb2.Document()
        d.raw_bytes = raw_bytes
        test_img.append(d)

    test_img_all_preprocessor = []
    pipline_prep1 = PipelinePreprocessor()
    pipline_prep1.components = lambda: [
        UnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE),
        ResizeChunkPreprocessor()
    ]
    pipline_prep2 = PipelinePreprocessor()
    pipline_prep2.components = lambda: [
        VanillaSlidingPreprocessor(),
        ResizeChunkPreprocessor()
    ]

    for preprocessor in [pipline_prep1, pipline_prep2]:
        test_img_copy = copy.deepcopy(test_img)
        for img in test_img_copy:
            preprocessor.apply(img)
        test_img_all_preprocessor.append([
            blob2array(chunk.blob) for img in test_img_copy
            for chunk in img.chunks
        ])
    return test_img_all_preprocessor
示例#9
0
    def test_video_cut_by_frame(self):
        args = set_preprocessor_parser().parse_args([
            '--yaml_path', self.yml_path_2,
        ])
        c_args = _set_client_parser().parse_args([
            '--port_in', str(args.port_out),
            '--port_out', str(args.port_in)
        ])

        with PreprocessorService(args), ZmqClient(c_args) as client:
            for req in RequestGenerator.index(self.video_bytes):
                msg = gnes_pb2.Message()
                msg.request.index.CopyFrom(req.index)
                client.send_message(msg)
                r = client.recv_message()
                for d in r.request.index.docs:
                    self.assertGreater(len(d.chunks), 0)
                    for _ in range(len(d.chunks) - 1):
                        shape = blob2array(d.chunks[_].blob).shape
                        self.assertEqual(shape, (30, 168, 192, 3))
                    shape = blob2array(d.chunks[-1].blob).shape
                    self.assertLessEqual(shape[0], 30)
示例#10
0
    def apply(self, doc: 'gnes_pb2.Document') -> None:
        super().apply(doc)

        video_frames = []

        if doc.WhichOneof('raw_data'):
            raw_type = type(getattr(doc, doc.WhichOneof('raw_data')))
            if doc.raw_bytes:
                video_frames = video.capture_frames(input_data=doc.raw_bytes,
                                                    scale=self.frame_size,
                                                    fps=self.frame_rate,
                                                    vframes=self.vframes)
            elif raw_type == gnes_pb2.NdArray:
                video_frames = blob2array(doc.raw_video)
                if self.vframes > 0:
                    video_frames = video_frames[0:self.vframes, :].copy()

            num_frames = len(video_frames)
            if num_frames > 0:
                shots = self.detect_shots(video_frames)
                for ci, frames in enumerate(shots):
                    c = doc.chunks.add()
                    c.doc_id = doc.doc_id
                    c.offset = ci
                    shot_len = len(frames)
                    c.weight = shot_len / num_frames
                    if self.sframes > 0 and shot_len > self.sframes:
                        if shot_len >= 2 * self.sframes:
                            step = math.ceil(shot_len / self.sframes)
                            frames = frames[::step]
                        else:
                            idx = np.sort(
                                np.random.choice(shot_len,
                                                 self.sframes,
                                                 replace=False))
                            frames = [frames[idx_] for idx_ in idx]

                    chunk_data = np.array(frames)
                    c.blob.CopyFrom(array2blob(chunk_data))
            else:
                self.logger.error(
                    'bad document: "raw_bytes" or "raw_video" is empty!')
        else:
            self.logger.error('bad document: "raw_data" is empty!')

        if self.drop_raw_data:
            self.logger.info("document raw data will be cleaned!")
            doc.ClearField('raw_data')
示例#11
0
def img_process_for_test(dirname):
    zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip'), "r")
    all_bytes = [zipfile_.open(v).read() for v in zipfile_.namelist()]
    test_img = []
    for raw_bytes in all_bytes:
        d = gnes_pb2.Document()
        d.raw_bytes = raw_bytes
        test_img.append(d)

    test_img_all_preprocessor = []
    for preprocessor in [BaseSingletonPreprocessor(doc_type=gnes_pb2.Document.IMAGE),
                         VanillaSlidingPreprocessor()]:
        test_img_copy = copy.deepcopy(test_img)
        for img in test_img_copy:
            preprocessor.apply(img)
        test_img_all_preprocessor.append([blob2array(chunk.blob)
                                          for img in test_img_copy for chunk in img.chunks])
    return test_img_all_preprocessor
示例#12
0
    def test_video_decode_preprocessor(self):
        args = set_preprocessor_parser().parse_args(['--yaml_path', self.yml_path])
        c_args = _set_client_parser().parse_args([
            '--port_in', str(args.port_out),
            '--port_out', str(args.port_in)])
        video_bytes = [
            open(os.path.join(self.video_path, _), 'rb').read()
            for _ in os.listdir(self.video_path)
        ]

        with ServiceManager(PreprocessorService, args), ZmqClient(c_args) as client:
            for req in RequestGenerator.index(video_bytes):
                msg = gnes_pb2.Message()
                msg.request.index.CopyFrom(req.index)
                client.send_message(msg)
                r = client.recv_message()
                for d in r.request.index.docs:
                    self.assertGreater(len(d.chunks), 0)
                    for _ in range(len(d.chunks)):
                        shape = blob2array(d.chunks[_].blob).shape
                        self.assertEqual(shape[1:], (299, 299, 3))
示例#13
0
 def apply(self, doc: 'gnes_pb2.Document') -> None:
     super().apply(doc)
     if len(doc.chunks) > 0:
         for chunk in doc.chunks:
             images = blob2array(chunk.blob)
             if len(images) == 0:
                 self.logger.warning("this chunk has no frame!")
             elif self.sframes == 1:
                 idx = [int(len(images) / 2)]
                 chunk.blob.CopyFrom(array2blob(images[idx]))
             elif self.sframes > 0 and len(images) > self.sframes:
                 if len(images) >= 2 * self.sframes:
                     step = math.ceil(len(images) / self.sframes)
                     chunk.blob.CopyFrom(array2blob(images[::step]))
                 else:
                     idx = np.sort(
                         np.random.choice(len(images),
                                          self.sframes,
                                          replace=False))
                     chunk.blob.CopyFrom(array2blob(images[idx]))
     else:
         self.logger.error('bad document: "doc.chunks" is empty!')
示例#14
0
    def apply(self, doc: 'gnes_pb2.Document') -> None:
        super().apply(doc)

        video_frames = []

        if doc.WhichOneof('raw_data'):
            raw_type = type(getattr(doc, doc.WhichOneof('raw_data')))
            if doc.raw_bytes:
                video_frames = video.capture_frames(input_data=doc.raw_bytes,
                                                    scale=self.frame_size,
                                                    fps=self.frame_rate,
                                                    vframes=self.frame_num)
            elif raw_type == gnes_pb2.NdArray:
                video_frames = blob2array(doc.raw_video)
                if self.frame_num > 0:
                    stepwise = len(video_frames) / self.frame_num
                    video_frames = video_frames[0::stepwise, :]

            num_frames = len(video_frames)
            if num_frames > 0:
                shots = self.detect_shots(video_frames)
                for ci, frames in enumerate(shots):
                    c = doc.chunks.add()
                    c.doc_id = doc.doc_id
                    chunk_data = np.array(frames)
                    c.blob.CopyFrom(array2blob(chunk_data))
                    c.offset = ci
                    c.weight = len(frames) / num_frames
            else:
                self.logger.error(
                    'bad document: "raw_bytes" or "raw_video" is empty!')
        else:
            self.logger.error('bad document: "raw_data" is empty!')

        if self.drop_raw_data:
            self.logger.info("document raw data will be cleaned!")
            doc.ClearField('raw_data')
示例#15
0
    def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args,
            **kwargs):
        """
        """
        for k, d in zip(keys, docs):
            dirs = os.path.join(self.data_path, str(k))
            if not os.path.exists(dirs):
                os.makedirs(dirs)
            # keep doc meta in .meta file
            with open(os.path.join(dirs, '.meta'), 'wb') as f:
                f.write(d.meta_info or b'')

            for i, chunk in enumerate(d.chunks):
                if self.file_format in ['png', 'webp', 'jpeg']:
                    chunk_dir = os.path.join(dirs, '%d' % i)
                    if not os.path.exists(chunk_dir):
                        os.makedirs(chunk_dir)
                    frames = blob2array(chunk.blob)
                    for j, x in enumerate(frames):
                        frame_path = os.path.join(
                            chunk_dir, '%d.%s' % (j, self.file_format))
                        img = Image.fromarray(x, 'RGB')
                        img.save(frame_path,
                                 self.file_format,
                                 quality=self.quality)
                elif self.file_format in ['mp4', 'gif']:
                    with open(
                            os.path.join(dirs,
                                         '%d.%s' % (i, self.file_format)),
                            'wb') as f:
                        f.write(chunk.raw)
                else:
                    self.logger.error(
                        "the file format %s has not been supported!" %
                        self.file_format)
                    raise NotImplementedError
示例#16
0
 def test_get_one_frame(self):
     doc = copy.deepcopy(self.doc)
     frame_selector = FrameSelectPreprocessor(sframes=1)
     frame_selector.apply(doc)
     for chunk in doc.chunks:
         self.assertEqual(blob2array(chunk.blob).shape[0], 1)
示例#17
0
 def test_array_proto(self):
     x = np.random.random([5, 4])
     blob = array2blob(x)
     x1 = blob2array(blob)
     assert_array_equal(x, x1)