Exemplo n.º 1
0
    def __init__(self, args):
        logger.debug('Create backend {}'.format(args['name']))

        self.name = args['name']

        self.input_tensor_queue = queue.Queue(maxsize=MAX_SESSION_SLOT_NUM)
        self.batched_tensor_queue = queue.Queue(maxsize=MAX_BATCHED_TENSOR_NUM)
        self.output_tensor_queue = queue.Queue(maxsize=MAX_SESSION_SLOT_NUM)

        self.dynamic_batch = args.get('dynamic_batch')
        self.duplicate_num = args.get('duplicate_num')
        self.model_path = args['path']
        self.model_type = args.get('model_type')
        self.max_batch_size = args.get('max_batch_size')
        self.timeout = args.get('timeout')
        self.use_mps = args.get('use_mps')

        self.metric_q = args.get('metric_queue')
        self.tags = {'model': self.name}

        self.threads = {}  # all threads
        self.io_queues = []  # io queue of request handler
        self.io_queue_lock = threading.Lock(
        )  # lock for create request handler

        # input shared memory
        self.input_shm_name_set = [
        ]  # shared memory name for concat and inference
        self.input_shm_set = []  # shared memory for concat and inference
        self.input_shm_queue = mp.Queue(maxsize=3 * self.duplicate_num)

        self.input_info = args.get('input_info')
        # create a set of input shared memory
        for idx in range(3 * self.duplicate_num):
            input_shm_name = []
            input_shm = []
            for info in self.input_info:
                shm_name = gen_name(info['name'], suffix=idx)
                sh = ShmHandler(shm_name, info['max_shape'], info['dtype'])
                sh.create_shm()
                input_shm_name.append(shm_name)
                input_shm.append(sh)

            self.input_shm_name_set.append(input_shm_name)
            self.input_shm_set.append(input_shm)
            self.input_shm_queue.put(idx)

        # output shared memory info
        self.output_info = args.get('output_info')

        self.use_mps = False if self.use_mps is None else self.use_mps
        self.dynamic_batch = True if self.dynamic_batch is None else self.dynamic_batch
        self.max_batch_size = 32 if self.max_batch_size is None else self.max_batch_size
        self.duplicate_num = 1 if self.duplicate_num is None else self.duplicate_num

        self.adapt = False
        if self.timeout is None:
            # print('TIMEOUT IS NONE')
            self.timeout = 0.01
            self.adapt = True
Exemplo n.º 2
0
 def _shm_info(tensor_infos):
     for info in tensor_infos:
         assert info.get('name')
         assert info.get('max_shape')
         assert info.get('dtype')
         info['shm_name'] = gen_name(info['name'])
         info['shm_size'] = \
             functools.reduce(operator.mul, info.get('max_shape')) * \
             np.dtype(info.get('dtype')).itemsize
Exemplo n.º 3
0
def model_process(model_name, model_type, model_path, shm_queue, conn,
                  input_info, output_info, pid, metric_q):
    try:
        # 1. init model
        if model_type == 'mock':
            from SimpleDBI.mock_model import MockModel
            model = MockModel(model_name, model_path)
        elif model_type == 'torch':
            from SimpleDBI.torch_model import TorchModel
            model = TorchModel(model_name, model_path)
        elif model_type == 'tf':
            from SimpleDBI.tf_model import TFModel
            model = TFModel(model_name, model_path, input_info, output_info)
        elif model_type == 'tensorrt':
            from SimpleDBI.tensorrt_model import TensorRTModel
            model = TensorRTModel(model_name, model_path)
        elif model_type == 'onnx2trt':
            from SimpleDBI.onnx2trt_model import TensorRTModel
            model = TensorRTModel(model_name, model_path)
        else:
            logger.error('ERROR MODEL TYPE : {}'.format(model_type))
            raise RuntimeError('ERROR MODEL TYPE : {}'.format(model_type))

        # 2. create shared memoty
        # 2.1 create output shared memory
        output_shm_name = []
        output_shm = []
        for info in output_info:
            shm_name = gen_name(info['name'])
            sh = ShmHandler(shm_name, info['max_shape'], info['dtype'])
            sh.create_shm()
            output_shm_name.append(shm_name)
            output_shm.append(sh)

        # 2.2 load input shared memory
        input_shm_name_list = conn.recv()
        input_shm_list = []
        for input_shm_name in input_shm_name_list:
            input_shm = []
            for shm_name, info in zip(input_shm_name, input_info):
                sh = ShmHandler(shm_name, info['max_shape'], info['dtype'])
                sh.load_shm()
                input_shm.append(sh)
            input_shm_list.append(input_shm)

        conn.send(output_shm_name)
    except:
        logger.error('model_process initialize error')
        logger.error(traceback.format_exc())
        return
    logger.info('model_process <{}> initialize done'.format(model_name))

    tags = {'model': '{}_{}'.format(model_name, pid)}
    # 3. inference
    while True:
        value = conn.recv()
        if value == EXIT_SIG:
            break

        shm_idx, input_shapes = value
        inputs = []
        output_shapes = []
        try:
            ts = time()
            # 3.1 load input
            input_shm = input_shm_list[shm_idx]
            for shape, sh in zip(input_shapes, input_shm):
                shm_arr = sh.ndarray(shape)
                inputs.append(shm_arr)

            # 3.2 forward
            outputs = model.forward(*inputs)

            # 3.3 write output
            for output, sh in zip(outputs, output_shm):
                shape = output.shape
                shm_arr = sh.ndarray(shape)
                shm_arr[:] = output[:]
                output_shapes.append(shape)

            if metric_q is not None:
                metric_q.put({
                    "tags": tags,
                    "fields": {
                        'model_proc_cost': time() - ts
                    },
                })

        except:
            logger.error('model_process runtime error')
            logger.error(traceback.format_exc())

        finally:
            conn.send(output_shapes)
            shm_queue.put(shm_idx)  # send shared memory to avalible queue

    # 4. clean
    try:
        for input_shm in input_shm_list:
            for sh in input_shm:
                sh.close()

        conn.send(True)
        stat = conn.recv()
        assert stat
        for sh in output_shm:
            sh.close()

        conn.close()
    except:
        logger.error('model_process destructor error')
        logger.error(traceback.format_exc())

    logger.error('Model process exit.')