def texts2tensor(self, texts): """ Tranform the texts(dict) to PaddleTensor Args: texts(dict): texts Returns: tensor(PaddleTensor): tensor with texts data """ lod = [0] data = [] for i, text in enumerate(texts): data += text['processed'] lod.append(len(text['processed']) + lod[i]) tensor = PaddleTensor(np.array(data).astype('int64')) tensor.name = "words" tensor.lod = [lod] tensor.shape = [lod[-1], 1] return tensor
def object_detection(self, paths=None, images=None, use_gpu=False, batch_size=1, output_dir='detection_result', score_thresh=0.5, visualization=True): """API of Object Detection. :param paths: the path of images. :type paths: list, each element is correspond to the path of an image. :param images: data of images, [N, H, W, C] :type images: numpy.ndarray :param use_gpu: whether to use gpu or not. :type use_gpu: bool :param batch_size: bathc size. :type batch_size: int :param output_dir: the directory to store the detection result. :type output_dir: str :param score_thresh: the threshold of detection confidence. :type score_thresh: float :param visualization: whether to draw bounding box and save images. :type visualization: bool """ resize_image = self.ssd.ResizeImage(target_size=300, interp=1, max_size=0, use_cv2=False) data_reader = partial(self.ssd.reader, paths, images, resize_image=resize_image) batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) paths = paths if paths else [] res = [] for iter_id, feed_data in enumerate(batch_reader()): np_data = np.array(feed_data).astype('float32') if np_data.shape == 1: np_data = np_data[0] else: np_data = np.squeeze(np_data, axis=1) data_tensor = PaddleTensor(np_data.copy()) if use_gpu: data_out = self.gpu_predictor.run([data_tensor]) else: data_out = self.cpu_predictor.run([data_tensor]) output = self.ssd.postprocess(paths=paths, images=images, data_out=data_out, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, handle_id=iter_id * batch_size, visualization=visualization) res += output return res
def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=False, output_dir="dcscn_output"): """ API for super resolution. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. paths (list[str]): The paths of images. use_gpu (bool): Whether to use gpu. visualization (bool): Whether to save image or not. output_dir (str): The path to store output images. Returns: res (list[dict]): each element in the list is a dict, the keys and values are: save_path (str, optional): the path to save images. (Exists only if visualization is True) data (numpy.ndarray): data of post processed image. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." ) all_data = list() for yield_data in reader(images, paths): all_data.append(yield_data) total_num = len(all_data) res = list() for i in range(total_num): image_x = np.array([all_data[i]['img_x']]) image_x2 = np.array([all_data[i]['img_x2']]) dropout = np.array([0]) image_x = PaddleTensor(image_x.copy()) image_x2 = PaddleTensor(image_x2.copy()) drop_out = PaddleTensor(dropout.copy()) output = self.gpu_predictor.run([ image_x, image_x2 ]) if use_gpu else self.cpu_predictor.run([image_x, image_x2]) output = np.expand_dims(output[0].as_ndarray(), axis=1) out = postprocess(data_out=output, org_im=all_data[i]['org_im'], org_im_shape=all_data[i]['org_im_shape'], org_im_path=all_data[i]['org_im_path'], output_dir=output_dir, visualization=visualization) res.append(out) return res
def texts2tensor(self, texts): """ Tranform the texts(dict) to PaddleTensor Args: texts(list): each element is a dict that must have a named 'processed' key whose value is word_ids, such as texts = [{'processed': [23, 89, 43, 906]}] Returns: tensor(PaddleTensor): tensor with texts data """ lod = [0] data = [] for i, text in enumerate(texts): data += text['processed'] lod.append(len(text['processed']) + lod[i]) tensor = PaddleTensor(np.array(data).astype('int64')) tensor.name = "words" tensor.lod = [lod] tensor.shape = [lod[-1], 1] return tensor
def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu=False): """ API for human video segmentation. Args: frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR. frame_id (int): index of the frame to be decoded. prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W] prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W] use_gpu (bool): Whether to use gpu. Returns: img_matting (numpy.ndarray): data of segmentation mask. cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W] optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W] """ resize_h = 192 resize_w = 192 is_init = True width = int(frame_org.shape[0]) height = int(frame_org.shape[1]) disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) frame = preprocess_v(frame_org, resize_w, resize_h) image = PaddleTensor(np.array([frame.copy()])) output = self.gpu_predictor.run( [image]) if use_gpu else self.cpu_predictor.run([image]) score_map = output[1].as_ndarray() frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] if frame_id == 1: prev_gray = np.zeros((resize_h, resize_w), np.uint8) prev_cfd = np.zeros((resize_h, resize_w), np.float32) optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) else: optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) return [img_matting, cur_gray, optflow_map]
def object_detection(self, paths=None, images=None, batch_size=1, use_gpu=False, output_dir='detection_result', score_thresh=0.5, visualization=True): """API of Object Detection. Args: paths (list[str]): The paths of images. images (list(numpy.ndarray)): images data, shape of each is [H, W, C] batch_size (int): batch size. use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. Returns: res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: left (float): The X coordinate of the upper left corner of the bounding box; top (float): The Y coordinate of the upper left corner of the bounding box; right (float): The X coordinate of the lower right corner of the bounding box; bottom (float): The Y coordinate of the lower right corner of the bounding box; label (str): The label of detection result; confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ paths = paths if paths else list() data_reader = partial(reader, paths, images) batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) if use_gpu: data_out = self.gpu_predictor.run([image_tensor]) else: data_out = self.cpu_predictor.run([image_tensor]) output = postprocess( paths=paths, images=images, data_out=data_out, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, handle_id=iter_id * batch_size, visualization=visualization) res.extend(output) return res
def fake_input(img): image = PaddleTensor() image.name = "image" image.shape = img.shape image.dtype = PaddleDType.FLOAT32 image.data = PaddleBuf(img.flatten().tolist()) return [image]
def copyToTensor(self, batch_size): tensor = PaddleTensor() tensor.name = self.name tensor.shape = [batch_size, self.shape_size] tensor.dtype = self.list[self.dtype] tensor.data = PaddleBuf(self.data) return tensor
def texts2tensor(self, texts): """ Tranform the texts(list) to PaddleTensor Args: texts(list): texts Returns: tensor(PaddleTensor): tensor with texts data """ lod = [0] data = [] for i, text in enumerate(texts): text_inds = word_to_ids(text, self.word2id_dict, self.word_replace_dict, oov_id=self.oov_id) data += text_inds lod.append(len(text_inds) + lod[i]) tensor = PaddleTensor(np.array(data).astype('int64')) tensor.name = "words" tensor.lod = [lod] tensor.shape = [lod[-1], 1] return tensor
def array2tensor(ndarray): """ convert numpy array to PaddleTensor""" assert isinstance(ndarray, np.ndarray), "input type must be np.ndarray" tensor = PaddleTensor() tensor.name = "data" tensor.shape = ndarray.shape if "float" in str(ndarray.dtype): tensor.dtype = PaddleDType.FLOAT32 elif "int" in str(ndarray.dtype): tensor.dtype = PaddleDType.INT64 else: raise ValueError("{} type ndarray is unsupported".format(tensor.dtype)) tensor.data = PaddleBuf(ndarray.flatten().tolist()) return tensor
def preprocess(img): img = cv2.resize(img, (input_size, input_size)) img = img.transpose((2, 0, 1)) if modelname == "mobilenet-ssd": img = (img - 127.5) * 0.007843 else: mean = np.array([103.94, 116.669, 123.68], np.float32).reshape([3, 1, 1]) img = img - mean image = PaddleTensor() image.name = "data" image.shape = [1, 3, input_size, input_size] image.dtype = PaddleDType.FLOAT32 image.data = PaddleBuf(img.flatten().astype("float32").tolist()) return [image]
def warp_input(image_data, input_size): """ deal input to paddle tensor :param image_data: 输入的图像 :param image_shape: 原始图像的大小 :param input_size: 输入图像的大小 :return: """ # image data image = PaddleTensor() image.name = 'image' image.shape = input_size image.dtype = PaddleDType.FLOAT32 image.data = PaddleBuf(image_data.flatten().astype(np.float32).tolist()) return image
def predict_proba(self, text_list, batch_size=32, max_seq_len=300): """预测 返回概率 """ predict_time = 0 tokenize_time = 0 res_list = list() for cur_batch_data_ids, cur_tokenize_time in \ self.batch(text_list, batch_size, max_seq_len, max_ensure=False): tokenize_time += cur_tokenize_time start_time = time.time() if self.zero_copy: self.input_tensor.copy_from_cpu(np.array(cur_batch_data_ids)) self.predictor.zero_copy_run() logits = self.output_tensor.copy_to_cpu() else: data_tensor = [PaddleTensor(np.array(cur_batch_data_ids))] logits = self.predictor.run(data_tensor)[0].as_ndarray() predict_time += time.time() - start_time res_list.append(logits) logging.info("predict time: %.4fs, tokenize_time: %.4fs"\ % (predict_time, tokenize_time)) return np.concatenate(res_list, axis=0)
def segmentation(self, images=None, paths=None, data=None, batch_size=1, use_gpu=False, output_dir='ace2p_output', visualization=False): """ API for human parsing. Args: images (list[numpy.ndarray]): images data, shape of each is [H, W, C], color space is BGR. paths (list[str]): The paths of images. batch_size (int): batch size. use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save output images or not. Returns: res (list[dict]): The result of human parsing and original path of images. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." ) # compatibility with older versions if data and 'image' in data: if paths is None: paths = [] paths += data['image'] # get all data all_data = [] scale = (473, 473) # size of preprocessed image. rotation = 0 # rotation angle, used for obtaining affine matrix in preprocess. for yield_data in reader(images, paths, scale, rotation): all_data.append(yield_data) total_num = len(all_data) loop_num = int(np.ceil(total_num / batch_size)) res = [] for iter_id in range(loop_num): batch_data = list() handle_id = iter_id * batch_size for image_id in range(batch_size): try: batch_data.append(all_data[handle_id + image_id]) except: pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) batch_image = PaddleTensor(batch_image.astype('float32')) data_out = self.gpu_predictor.run([ batch_image ]) if use_gpu else self.cpu_predictor.run([batch_image]) # postprocess one by one for i in range(len(batch_data)): out = postprocess( data_out=data_out[0].as_ndarray()[i], org_im=batch_data[i]['org_im'], org_im_path=batch_data[i]['org_im_path'], image_info=batch_data[i]['image_info'], output_dir=output_dir, visualization=visualization, palette=self.palette) res.append(out) return res
def array2tensor(ndarray): """ convert numpy array to PaddleTensor""" assert isinstance(ndarray, np.ndarray), "input type must be np.ndarray" tensor = PaddleTensor(data=ndarray) return tensor
def test_inference_api(self): tensor32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') paddletensor32 = PaddleTensor(tensor32) value32 = np.array(paddletensor32.data.int32_data()).reshape(*[20, 2]) dtype32 = paddletensor32.dtype self.assertEqual(value32.all(), tensor32.all()) self.assertEqual(dtype32, PaddleDType.INT32) self.assertEqual( type(paddletensor32.data.tolist('int32')), type(tensor32.tolist())) self.assertEqual( paddletensor32.data.tolist('int32'), tensor32.ravel().tolist()) self.assertEqual(type(paddletensor32.as_ndarray()), type(tensor32)) paddletensor32.data.reset(tensor32) self.assertEqual(paddletensor32.as_ndarray().all(), tensor32.all()) tensor64 = np.random.randint(10, 20, size=[20, 2]).astype('int64') paddletensor64 = PaddleTensor(tensor64) value64 = np.array(paddletensor64.data.int64_data()).reshape(*[20, 2]) dtype64 = paddletensor64.dtype self.assertEqual(value64.all(), tensor64.all()) self.assertEqual(dtype64, PaddleDType.INT64) self.assertEqual( type(paddletensor64.data.tolist('int64')), type(tensor64.tolist())) self.assertEqual( paddletensor64.data.tolist('int64'), tensor64.ravel().tolist()) self.assertEqual(type(paddletensor64.as_ndarray()), type(tensor64)) paddletensor64.data.reset(tensor64) self.assertEqual(paddletensor64.as_ndarray().all(), tensor64.all()) tensor_float = np.random.randn(20, 2).astype('float32') paddletensor_float = PaddleTensor(tensor_float) value_float = np.array(paddletensor_float.data.float_data()).reshape( *[20, 2]) dtype_float = paddletensor_float.dtype self.assertEqual(value_float.all(), tensor_float.all()) self.assertEqual(dtype_float, PaddleDType.FLOAT32) self.assertEqual( type(paddletensor_float.data.tolist('float32')), type(tensor_float.tolist())) self.assertEqual( paddletensor_float.data.tolist('float32'), tensor_float.ravel().tolist()) self.assertEqual( type(paddletensor_float.as_ndarray()), type(tensor_float)) paddletensor_float.data.reset(tensor_float) self.assertEqual(paddletensor_float.as_ndarray().all(), tensor_float.all())
def segment(self, images=None, paths=None, batch_size=1, use_gpu=False, visualization=False, output_dir='humanseg_server_output'): """ API for human segmentation. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. paths (list[str]): The paths of images. batch_size (int): batch size. use_gpu (bool): Whether to use gpu. visualization (bool): Whether to save image or not. output_dir (str): The path to store output images. Returns: res (list[dict]): each element in the list is a dict, the keys and values are: save_path (str, optional): the path to save images. (Exists only if visualization is True) data (numpy.ndarray): data of post processed image. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." ) # compatibility with older versions all_data = list() for yield_data in reader(images, paths): all_data.append(yield_data) total_num = len(all_data) loop_num = int(np.ceil(total_num / batch_size)) res = list() for iter_id in range(loop_num): batch_data = list() handle_id = iter_id * batch_size for image_id in range(batch_size): try: batch_data.append(all_data[handle_id + image_id]) except: pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) batch_image = PaddleTensor(batch_image.copy()) output = self.gpu_predictor.run([ batch_image ]) if use_gpu else self.cpu_predictor.run([batch_image]) output = output[1].as_ndarray() output = np.expand_dims(output[:, 1, :, :], axis=1) # postprocess one by one for i in range(len(batch_data)): out = postprocess(data_out=output[i], org_im=batch_data[i]['org_im'], org_im_shape=batch_data[i]['org_im_shape'], org_im_path=batch_data[i]['org_im_path'], output_dir=output_dir, visualization=visualization) res.append(out) return res
def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_server_video'): resize_h = 512 resize_w = 512 if not video_path: cap_video = cv2.VideoCapture(0) else: cap_video = cv2.VideoCapture(video_path) if not cap_video.isOpened(): raise IOError("Error opening video stream or file, " "--video_path whether existing: {}" " or camera whether working".format(video_path)) width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) prev_gray = np.zeros((resize_h, resize_w), np.uint8) prev_cfd = np.zeros((resize_h, resize_w), np.float32) is_init = True fps = cap_video.get(cv2.CAP_PROP_FPS) if video_path is not None: print('Please wait. It is computing......') if not osp.exists(save_dir): os.makedirs(save_dir) save_path = osp.join(save_dir, 'result' + '.avi') cap_out = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) while cap_video.isOpened(): ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) image = PaddleTensor(np.array([frame.copy()])) output = self.gpu_predictor.run([ image ]) if use_gpu else self.cpu_predictor.run([image]) score_map = output[1].as_ndarray() frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) bg_im = np.ones_like(img_matting) * 255 comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) cap_out.write(comb) else: break cap_video.release() cap_out.release() else: while cap_video.isOpened(): ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) image = PaddleTensor(np.array([frame.copy()])) output = self.gpu_predictor.run([ image ]) if use_gpu else self.cpu_predictor.run([image]) score_map = output[1].as_ndarray() frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) bg_im = np.ones_like(img_matting) * 255 comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) cv2.imshow('HumanSegmentation', comb) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap_video.release()
def face_detection(self, images=None, paths=None, data=None, batch_size=1, use_gpu=False, output_dir='face_detector_320_predict_output', visualization=False, confs_threshold=0.5, iou_threshold=0.5): """ API for face detection. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. paths (list[str]): The paths of images. batch_size (int): batch size. use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. confs_threshold (float): threshold for confidence coefficient. iou_threshold (float): threshold for iou. Returns: res (list[dict()]): The result of face detection and save path of images. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." ) # compatibility with older versions if data and 'image' in data: if paths is None: paths = [] paths += data['image'] # get all data all_data = [] for yield_data in reader(images, paths): all_data.append(yield_data) total_num = len(all_data) loop_num = int(np.ceil(total_num / batch_size)) res = [] for iter_id in range(loop_num): batch_data = list() handle_id = iter_id * batch_size for image_id in range(batch_size): try: batch_data.append(all_data[handle_id + image_id]) except: pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) batch_image = PaddleTensor(batch_image.astype('float32')) data_out = self.gpu_predictor.run([ batch_image ]) if use_gpu else self.cpu_predictor.run([batch_image]) confidences = data_out[0].as_ndarray() boxes = data_out[1].as_ndarray() # postprocess one by one for i in range(len(batch_data)): out = postprocess(confidences=confidences[i], boxes=boxes[i], orig_im=batch_data[i]['orig_im'], orig_im_shape=batch_data[i]['orig_im_shape'], orig_im_path=batch_data[i]['orig_im_path'], output_dir=output_dir, visualization=visualization, confs_threshold=confs_threshold, iou_threshold=iou_threshold) res.append(out) return res
def face_detection(self, images=None, paths=None, data=None, use_gpu=False, output_dir='detection_result', visualization=False, shrink=0.5, confs_threshold=0.6): """ API for face detection. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C] paths (list[str]): The paths of images. use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. shrink (float): parameter to control the resize scale in preprocess. confs_threshold (float): confidence threshold. Returns: res (list[dict]): The result of face detection and save path of images. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." ) # compatibility with older versions if data: if 'image' in data: if paths is None: paths = list() paths += data['image'] elif 'data' in data: if images is None: images = list() images += data['data'] res = list() # process one by one for element in reader(images, paths, shrink): image = np.expand_dims(element['image'], axis=0).astype('float32') image_tensor = PaddleTensor(image.copy()) data_out = self.gpu_predictor.run([ image_tensor ]) if use_gpu else self.cpu_predictor.run([image_tensor]) out = postprocess( data_out=data_out[0].as_ndarray(), org_im=element['org_im'], org_im_path=element['org_im_path'], image_width=element['image_width'], image_height=element['image_height'], output_dir=output_dir, visualization=visualization, shrink=shrink, confs_threshold=confs_threshold) res.append(out) return res
def bald(self, images=None, paths=None, data=None, use_gpu=False, org_labels=[[0., 0., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1.]], target_labels=None, visualization=True, output_dir="bald_output"): """ API for super resolution. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. paths (list[str]): The paths of images. data (dict): key is 'image', the corresponding value is the path to image. use_gpu (bool): Whether to use gpu. visualization (bool): Whether to save image or not. output_dir (str): The path to store output images. Returns: res (list[dict]): each element in the list is a dict, the keys and values are: save_path (str, optional): the path to save images. (Exists only if visualization is True) data (numpy.ndarray): data of post processed image. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." ) if data and 'image' in data: if paths is None: paths = list() paths += data['image'] all_data = list() for yield_data in reader(images, paths, org_labels, target_labels): all_data.append(yield_data) total_num = len(all_data) res = list() outputs = [] for i in range(total_num): image_np = all_data[i]['img'] org_label_np = [all_data[i]['org_label']] target_label_np = [all_data[i]['target_label']] for j in range(5): if j % 2 == 0: label_trg_tmp = copy.deepcopy(target_label_np) new_i = 0 label_trg_tmp[0][new_i] = 1.0 - label_trg_tmp[0][new_i] label_trg_tmp = check_attribute_conflict(label_trg_tmp) change_num = j * 0.02 + 0.3 label_org_tmp = list( map(lambda x: ((x * 2) - 1) * change_num, org_label_np)) label_trg_tmp = list( map(lambda x: ((x * 2) - 1) * change_num, label_trg_tmp)) image = PaddleTensor(image_np.copy()) org_label = PaddleTensor( np.array(label_org_tmp).astype('float32')) target_label = PaddleTensor( np.array(label_trg_tmp).astype('float32')) output = self.gpu_predictor.run([ image, target_label, org_label ]) if use_gpu else self.cpu_predictor.run( [image, org_label, target_label]) outputs.append(output) out = postprocess(data_out=outputs, org_im=all_data[i]['org_im'], org_im_path=all_data[i]['org_im_path'], output_dir=output_dir, visualization=visualization) res.append(out) return res
def predict_det(self, inputs): inputs = PaddleTensor(inputs.copy()) result = self.predictor.run([inputs]) output_data = result[0].as_ndarray() return output_data
def face_detection(self, images=None, paths=None, data=None, use_gpu=False, output_dir='detection_result', visualization=False, score_thresh=0.15): """ API for face detection. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C] paths (list[str]): The paths of images. use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): score threshold to limit the detection result. Returns: res (list[dict]): The result of face detection, keys are 'data' and 'path', the correspoding values are: data (list[dict]): 5 keys, where 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, 'confidence' is the confidence this bbox. path (str): The path of original image. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." ) # compatibility with older versions if data: if 'image' in data: if paths is None: paths = list() paths += data['image'] res = list() # process one by one for element in reader(images, paths): image = np.expand_dims(element['image'], axis=0).astype('float32') image_tensor = PaddleTensor(image.copy()) data_out = self.gpu_predictor.run([ image_tensor ]) if use_gpu else self.cpu_predictor.run([image_tensor]) # print(len(data_out)) # 1 out = postprocess( data_out=data_out[0].as_ndarray(), org_im=element['org_im'], org_im_path=element['org_im_path'], org_im_width=element['org_im_width'], org_im_height=element['org_im_height'], output_dir=output_dir, visualization=visualization, score_thresh=score_thresh) res.append(out) return res
def object_detection(self, paths=None, images=None, batch_size=1, use_gpu=False, output_dir='yolov3_pedestrian_detect_output', score_thresh=0.2, visualization=True): """API of Object Detection. Args: paths (list[str]): The paths of images. images (list(numpy.ndarray)): images data, shape of each is [H, W, C] batch_size (int): batch size. use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. Returns: res (list[dict]): The result of pedestrian detecion. keys include 'data', 'save_path', the corresponding value is: data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: left (float): The X coordinate of the upper left corner of the bounding box; top (float): The Y coordinate of the upper left corner of the bounding box; right (float): The X coordinate of the lower right corner of the bounding box; bottom (float): The Y coordinate of the lower right corner of the bounding box; label (str): The label of detection result; confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." ) paths = paths if paths else list() data_reader = partial(reader, paths, images) batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) if use_gpu: data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) else: data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) output = postprocess( paths=paths, images=images, data_out=data_out, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, handle_id=iter_id * batch_size, visualization=visualization) res.extend(output) return res
def array2tensor(self, arr_data): """ convert numpy array to PaddleTensor """ tensor_data = PaddleTensor(arr_data) return tensor_data
def face_detection(self, images=None, paths=None, data=None, batch_size=1, use_gpu=False, visualization=False, output_dir='detection_result', use_multi_scale=False, shrink=0.5, confs_threshold=0.6): """ API for face detection. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space must be BGR. paths (list[str]): The paths of images. batch_size (int): batch size of image tensor to be fed into the later classification network. use_gpu (bool): Whether to use gpu. visualization (bool): Whether to save image or not. output_dir (str): The path to store output images. use_multi_scale (bool): whether to enable multi-scale face detection. Enabling multi-scale face detection can increase the accuracy to detect faces, however, it reduce the prediction speed for the increase model calculation. shrink (float): parameter to control the resize scale in preprocess. confs_threshold (float): confidence threshold. Returns: res (list[dict]): The result of face detection and save path of images. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." ) # compatibility with older versions if data: if 'image' in data: if paths is None: paths = list() paths += data['image'] elif 'data' in data: if images is None: images = list() images += data['data'] # get all data all_element = list() for yield_data in reader(self.face_detector, shrink, confs_threshold, images, paths, use_gpu, use_multi_scale): all_element.append(yield_data) image_list = list() element_image_num = list() for i in range(len(all_element)): element_image = [ handled['image'] for handled in all_element[i]['preprocessed'] ] element_image_num.append(len(element_image)) image_list.extend(element_image) total_num = len(image_list) loop_num = int(np.ceil(total_num / batch_size)) predict_out = np.zeros((1, 2)) for iter_id in range(loop_num): batch_data = list() handle_id = iter_id * batch_size for element_id in range(batch_size): try: batch_data.append(image_list[handle_id + element_id]) except: pass image_arr = np.squeeze(np.array(batch_data), axis=1) image_tensor = PaddleTensor(image_arr.copy()) data_out = self.gpu_predictor.run([ image_tensor ]) if use_gpu else self.cpu_predictor.run([image_tensor]) # len(data_out) == 1 # data_out[0].as_ndarray().shape == (-1, 2) data_out = data_out[0].as_ndarray() predict_out = np.concatenate((predict_out, data_out)) predict_out = predict_out[1:] # postprocess one by one res = list() for i in range(len(all_element)): detect_faces_list = [ handled['face'] for handled in all_element[i]['preprocessed'] ] interval_left = sum(element_image_num[0:i]) interval_right = interval_left + element_image_num[i] out = postprocess( confidence_out=predict_out[interval_left:interval_right], org_im=all_element[i]['org_im'], org_im_path=all_element[i]['org_im_path'], detected_faces=detect_faces_list, output_dir=output_dir, visualization=visualization) res.append(out) return res
def object_detection(self, paths=None, images=None, data=None, use_gpu=False, batch_size=1, output_dir='detection_result', score_thresh=0.5, visualization=True): """API of Object Detection. :param paths: the path of images. :type paths: list, each element is correspond to the path of an image. :param images: data of images, [N, H, W, C] :type images: numpy.ndarray :param use_gpu: whether to use gpu or not. :type use_gpu: bool :param batch_size: bathc size. :type batch_size: int :param output_dir: the directory to store the detection result. :type output_dir: str :param score_thresh: the threshold of detection confidence. :type score_thresh: float :param visualization: whether to draw box and save images. :type visualization: bool """ if data and 'image' in data: paths = data['image'] if not paths else paths + data['image'] all_images = [] paths = paths if paths else [] for yield_return in self.faster_rcnn.test_reader(paths, images): all_images.append(yield_return) images_num = len(all_images) loop_num = ceil(images_num / batch_size) res = [] for iter_id in range(loop_num): batch_data = [] handle_id = iter_id * batch_size for image_id in range(batch_size): try: batch_data.append(all_images[handle_id + image_id]) except: pass padding_image, padding_info, padding_shape = self.faster_rcnn.padding_minibatch( batch_data) padding_image_tensor = PaddleTensor(padding_image.copy()) padding_info_tensor = PaddleTensor(padding_info.copy()) padding_shape_tensor = PaddleTensor(padding_shape.copy()) feed_list = [ padding_image_tensor, padding_info_tensor, padding_shape_tensor ] if use_gpu: data_out = self.gpu_predictor.run(feed_list) else: data_out = self.cpu_predictor.run(feed_list) output = self.faster_rcnn.postprocess(paths=paths, images=images, data_out=data_out, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, handle_id=handle_id, visualization=visualization) res += output return res
def object_detection(self, paths=None, images=None, use_gpu=False, batch_size=1, output_dir='detection_result', score_thresh=0.5, visualization=True): """API of Object Detection. Args: paths (list[str]): The paths of images. images (list(numpy.ndarray)): images data, shape of each is [H, W, C] batch_size (int): batch size. use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. visualization (bool): whether to save result as images. Returns: res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: left (float): The X coordinate of the upper left corner of the bounding box; top (float): The Y coordinate of the upper left corner of the bounding box; right (float): The X coordinate of the lower right corner of the bounding box; bottom (float): The Y coordinate of the lower right corner of the bounding box; label (str): The label of detection result; confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." ) all_images = list() paths = paths if paths else list() for yield_data in test_reader(paths, images): all_images.append(yield_data) images_num = len(all_images) loop_num = int(np.ceil(images_num / batch_size)) res = list() for iter_id in range(loop_num): batch_data = list() handle_id = iter_id * batch_size for image_id in range(batch_size): try: batch_data.append(all_images[handle_id + image_id]) except: pass padding_image, padding_info = padding_minibatch( batch_data, coarsest_stride=32, use_padded_im_info=True) padding_image_tensor = PaddleTensor(padding_image.copy()) padding_info_tensor = PaddleTensor(padding_info.copy()) feed_list = [padding_image_tensor, padding_info_tensor] if use_gpu: data_out = self.gpu_predictor.run(feed_list) else: data_out = self.cpu_predictor.run(feed_list) output = postprocess( paths=paths, images=images, data_out=data_out, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, handle_id=handle_id, visualization=visualization) res += output return res
def style_transfer(self, images=None, paths=None, alpha=1, use_gpu=False, output_dir='transfer_result', visualization=False): """ API for image style transfer. Args: images (list): list of dict objects, each dict contains key: content(str): value is a numpy.ndarry with shape [H, W, C], content data. styles(str): value is a list of numpy.ndarray with shape [H, W, C], styles data. weights(str, optional): value is the interpolation weights correspond to styles. paths (list): list of dict objects, each dict contains key: content(str): value is the path to content. styles(str): value is the paths to styles. weights(str, optional): value is the interpolation weights correspond to styles. alpha (float): The weight that controls the degree of stylization. Should be between 0 and 1. use_gpu (bool): whether to use gpu. output_dir (str): the path to store output images. visualization (bool): whether to save image or not. Returns: im_output (list[dict()]): list of output images and save path of images. """ if use_gpu: try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) except: raise RuntimeError( "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." ) im_output = [] for component, w, h in reader(images, paths): content = PaddleTensor(component['content_arr'].copy()) content_feats = self.gpu_predictor_enc.run([ content ]) if use_gpu else self.cpu_predictor_enc.run([content]) accumulate = np.zeros((3, 512, 512)) for idx, style_arr in enumerate(component['styles_arr_list']): style = PaddleTensor(style_arr.copy()) # encode style_feats = self.gpu_predictor_enc.run([ style ]) if use_gpu else self.cpu_predictor_enc.run([style]) fr_feats = fr(content_feats[0].as_ndarray(), style_feats[0].as_ndarray(), alpha) fr_feats = PaddleTensor(fr_feats.copy()) # decode predict_outputs = self.gpu_predictor_dec.run([ fr_feats ]) if use_gpu else self.cpu_predictor_dec.run([fr_feats]) # interpolation accumulate += predict_outputs[0].as_ndarray( )[0] * component['style_interpolation_weights'][idx] # postprocess save_im_name = 'ndarray_{}.jpg'.format(time.time()) result = postprocess(accumulate, output_dir, save_im_name, visualization, size=(w, h)) im_output.append(result) return im_output