def process(inputs, ctx, **kwargs): frame, is_streaming = helpers.load_image(inputs, 'input', rgb=False) LOG.info("frame shape: {}".format(frame.shape)) bboxes, probabilities = detect_bboxes( ctx.drivers[0], frame, PARAMS.get("threshold", .5), ) for i, bbox in enumerate(bboxes): draw_bbox( frame, bbox.astype(int), label="Detected {}\nprobability: {:.2f}".format( PARAMS["object_name"], probabilities[i]), ) if is_streaming: output = frame[:, :, ::-1] else: _, buf = cv2.imencode('.jpg', frame) output = buf.tostring() return { 'output': output, 'bboxes': bboxes.tolist(), 'probabilities': probabilities.tolist(), }
def process(self, inputs, ctx, **kwargs): if self.model is None: img, is_video = helpers.load_image(inputs, 'image', rgb=False) img = img[:, :, ::-1] if not is_video: img = cv2.imencode('.jpg', img)[1].tostring() return {'output': img} return self.model.process(inputs, ctx, **kwargs)
def preprocess_boxes(inputs, ctx): image, _ = helpers.load_image(inputs, 'image') #image = image[:, :, ::-1] resized_im, ratio = resize_image(image) resized_im = resized_im.astype(np.float32) ctx.image = image ctx.ratio = ratio return { 'images': np.stack([resized_im], axis=0), }
def process(inputs, ctx, **kwargs): image, is_video = load_image(inputs, 'inputs') if image is None: raise RuntimeError('Missing "inputs" key in inputs. Provide an image in "inputs" key') def _return(result): encoding = '' if not is_video: if result.shape[2] == 3: result = result[:, :, ::-1] result = cv2.imencode('.jpg', result)[1].tostring() encoding = 'jpeg' else: result = result result = cv2.imencode('.png', result)[1].tostring() encoding = 'png' return {'output': result, 'encoding': encoding} ratio = 1.0 w = float(image.shape[1]) h = float(image.shape[0]) if w > h: if w > 1024: ratio = w / 1024.0 else: if h > 1024: ratio = h / 1024.0 if ratio > 1: image = cv2.resize(image, (int(w / ratio), int(h / ratio))) input = cv2.resize(image, (160, 160)) input = np.asarray(input, np.float32) / 255.0 outputs = ctx.drivers[0].predict({'image': np.expand_dims(input, axis=0)}) mask = outputs['output'][0] mask = cv2.resize(mask, (image.shape[1], image.shape[0])) mask = cv2.GaussianBlur(mask, (21, 21), 11) mask = np.expand_dims(mask, 2) back_name = get_param(inputs, 'background', None) if back_name is not None: background = backgrounds.get(back_name) else: if glob_background is not None: background = glob_background else: background = backgrounds.get('None') image = image.astype(np.float32) * mask background = cv2.resize(background, (image.shape[1], image.shape[0])) background = background.astype(np.float32) background = background * (1 - mask) image = background + image image = image.astype(np.uint8) return _return(image)
def process(inputs, ctx): if len(ctx.drivers) < 2: raise RuntimeError('Required 2 models: face and cyclegan') enable_color_transfer = get_param(inputs, 'color_transfer') alpha = get_param(inputs, 'alpha') face_driver = ctx.drivers[0] cyclegan_driver = ctx.drivers[1] input_name = list(cyclegan_driver.inputs.keys())[0] original, is_video = helpers.load_image(inputs, 'input') image = original.copy() boxes = get_boxes(face_driver, image) for box in boxes: box = box.astype(int) img = crop_by_box(image, box) prepared = np.expand_dims(prepare_image(img), axis=0) outputs = cyclegan_driver.predict({input_name: prepared}) output = list(outputs.values())[0].squeeze() output = inverse_transform(output) output = scale(output) # output = (output * 255).astype(np.uint8) output = cv2.resize(np.array(output), (box[2] - box[0], box[3] - box[1]), interpolation=cv2.INTER_AREA) if enable_color_transfer: output = color_transfer.color_transfer(img, output, clip=True, preserve_paper=False) center = (box[0] + output.shape[1] // 2, box[1] + output.shape[0] // 2) alpha = np.clip(alpha, 1, 255) image = cv2.seamlessClone(output, image, np.ones_like(output) * alpha, center, cv2.NORMAL_CLONE) # image[box[1]:box[3], box[0]:box[2]] = (output / 2 + img / 2).astype(np.uint8) # merge image = np.vstack((original, image)) if not is_video: image = image[:, :, ::-1] image_bytes = cv2.imencode('.jpg', image)[1].tostring() else: image_bytes = image return {'output': image_bytes}
def process(inputs, ct_x, **kwargs): original_image, is_video = load_image(inputs, 'inputs') if original_image is None: raise RuntimeError('Missing "inputs" key in inputs. Provide an image in "inputs" key') if original_image.shape[2] > 3: original_image = original_image[:, :, 0:3] def _return(result): encoding = '' if not is_video: if result.shape[2] == 3: result = result[:, :, ::-1] result = cv2.imencode('.jpg', result)[1].tostring() encoding = 'jpeg' else: result = result result = cv2.imencode('.png', result)[1].tostring() encoding = 'png' return {'output': result, 'encoding': encoding} ratio = 1.0 w = float(original_image.shape[1]) h = float(original_image.shape[0]) if w > h: if w > 1024: ratio = w / 1024.0 else: if h > 1024: ratio = h / 1024.0 if ratio > 1: image = cv2.resize(original_image, (int(w / ratio), int(h / ratio))) else: image = original_image serv_image = cv2.resize(image, (160, 160)).astype(np.float32) / 255.0 result = ct_x.drivers[0].predict({'image': np.expand_dims(serv_image, axis=0)}) mask = result['output'][0] mask[mask < 0.5] = 0 if mask.shape != image.shape: mask = cv2.resize(mask, (image.shape[1], image.shape[0])) mask = cv2.GaussianBlur(mask, (21, 21), 11) if len(mask.shape) == 2: mask = np.expand_dims(mask, axis=2) if not is_video: mask = (mask * 255).astype(np.uint8) image = image[:, :, ::-1].astype(np.uint8) image = np.concatenate([image, mask], axis=2) else: image = image.astype(np.float32) * mask image = image.astype(np.uint8) return _return(image)
def process(inputs, ctx, **kwargs): face_driver = ctx.driver image, is_video = helpers.load_image(inputs, 'input', rgb=False) boxes = beautify.get_boxes(face_driver, image, PARAMS['threshold']) for box in boxes: image = beautify.beauty(landmarks_driver, image, box) if is_video: return {'output': image} else: image_bytes = cv2.imencode('.jpg', image)[1].tostring() return {'output': image_bytes}
def process(inputs, ctx, **kwargs): frame, is_video = helpers.load_image(inputs, 'image') #if frame is not None: # return {'output': frame} key = kwargs.get('metadata', {}).get('stream_id', None) if key is None: return {'output': frame} track = trackers.get(key, None) if track is None: logging.info('NEW TRACK: {}'.format(key)) track = Worker(source_image) trackers[key] = track frame = track.process(frame) return {'output': frame[:, :, :]}
def process(inputs, ctx, **kwargs): original, is_video = helpers.load_image(inputs, 'input') image = original.copy() if kwargs.get('detect') == 'false' or len(ctx.drivers) == 1: detect_driver = None reid_driver = ctx.drivers[0] else: detect_driver = ctx.drivers[0] reid_driver = ctx.drivers[1] # reid_input_shape = list(reid_driver.inputs.values())[0] input_name = list(reid_driver.inputs.keys())[0] if detect_driver is not None: boxes = get_boxes(detect_driver, image, threshold=0.3) else: boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) print(f'boxes={len(boxes)}') for box in boxes: box = box.astype(int) img = crop_by_box(image, box) img = cv2.resize(img, tuple(PARAMS['input_shape'][::-1]), interpolation=cv2.INTER_AREA) prepared = norm(img, need_transpose=PARAMS['driver_type'] == 'pytorch') prepared = np.expand_dims(prepared, axis=0) outputs = reid_driver.predict({input_name: prepared}) global kd_tree embedding = list(outputs.values())[0] embedding = (embedding + 1.) / 2. if not kd_tree: kd_tree = neighbors.KDTree(embedding, metric='euclidean') else: dist, idx = kd_tree.query(embedding, k=1) print(f'distance={dist}') cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color=(0, 250, 0), thickness=2, lineType=cv2.LINE_AA) if is_video: output = image else: _, buf = cv2.imencode('.jpg', image[:, :, ::-1]) output = buf.tostring() return {'output': output}
def process(inputs, ctx, **kwargs): original_image, is_video = load_image(inputs, 'inputs') if is_video: return { 'outputs': original_image, 'status': cv2.resize( original_image, (original_image.shape[1] // 2, original_image.shape[0] // 2)) } else: _, buf = cv2.imencode('.png', original_image[:, :, ::-1]) image = np.array(buf).tostring() return {'outputs': image}
def process(inputs, ctx, **kwargs): frame, _ = helpers.load_image(inputs, 'input') data, iw, ih = resize(frame, 512) h = frame.shape[0] w = frame.shape[1] driver = ctx.drivers[0] data = data.astype(np.float32) / 255.0 data = np.expand_dims(data, 0) result = driver.predict({'image': data}) cls = result['Reshape_1'][0, 0:ih, 0:iw, 1] links = result['Reshape_4'][0, 0:ih, 0:iw, :, 1] out_mask = cv2.resize(cls, (w, h), interpolation=cv2.INTER_NEAREST) frame = frame.astype(np.float32) * np.expand_dims(out_mask, 2) frame = frame.astype(np.uint8) return {'output': frame}
def process(inputs, ctx, **kwargs): frame, is_video = helpers.load_image(inputs, 'input') # convert to BGR data = frame.copy() #data = data[:, :, ::-1] data = cv2.resize(data, PARAMS['target_size'], interpolation=cv2.INTER_AREA) # convert to input shape (N, C, H, W) data = np.expand_dims(np.transpose(data, [2, 0, 1]), axis=0) input_name = list(kwargs['model_inputs'])[0] outputs = ctx.driver.predict({input_name: data}) outputs = list(outputs.values())[0].reshape([-1, 7]) # 7 values: # class_id, label, confidence, x_min, y_min, x_max, y_max # Select boxes where confidence > factor outputs = outputs.reshape(-1, 7) bboxes_raw = outputs[outputs[:, 2] > PARAMS['threshold']] bounding_boxes = bboxes_raw[:, 3:7] bounding_boxes[:, 0] = bounding_boxes[:, 0] * frame.shape[1] bounding_boxes[:, 2] = bounding_boxes[:, 2] * frame.shape[1] bounding_boxes[:, 1] = bounding_boxes[:, 1] * frame.shape[0] bounding_boxes[:, 3] = bounding_boxes[:, 3] * frame.shape[0] bounding_boxes = bounding_boxes.astype(int) result = { 'person_boxes': bounding_boxes, 'person_scores': bboxes_raw[:, 2] } if len(bounding_boxes) > 0: add_overlays(frame, bounding_boxes, labels=None) table = result_table_string(result, frame) else: table = [] if not is_video: frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) image_bytes = cv2.imencode(".jpg", frame, params=[cv2.IMWRITE_JPEG_QUALITY, 95])[1].tostring() else: image_bytes = frame return {'output': image_bytes, 'table_output': table}
def process(inputs, ctx, **kwargs): style_name = helpers.get_param(inputs, 'style', None) default_model = ctx.global_ctx['default_model'] if style_name == 'young' or default_model == 'young': return ctx.global_ctx['young'].process(inputs, ctx, **kwargs) img, is_video = helpers.load_image(inputs, 'image', rgb=False) if style_name is not None: p = style_name.split('_') model = default_model if len(p) > 1: model = p[0] style_name = '_'.join(p[1:]) img = ctx.global_ctx[model].process(img, style_name, inputs) if not is_video: img = img[:, :, ::-1] img = cv2.imencode('.jpg', img)[1].tostring() return {'output': img}
def process(inputs, ctx, **kwargs): frame, is_streaming = helpers.load_image(inputs, 'input', rgb=False) LOG.info("frame shape: {}".format(frame.shape)) detect_driver = ctx.drivers[0] bboxes, probabilities = detect_bboxes( detect_driver, frame, PARAMS.get("threshold", .5), ) head_poses = [] if len(bboxes) > 0: head_pose_driver = ctx.drivers[1] head_poses = head.head_pose(head_pose_driver, frame, bboxes, rgb=False) for i, bbox in enumerate(bboxes): draw_bbox( frame, bbox.astype(int), label="Detected face\n" "probability: {:.2f}\n" "yaw: {:.2f}\n" "pitch: {:.2f}\n" "roll: {:.2f}".format(probabilities[i], head_poses[i][0], head_poses[i][1], head_poses[i][2]), ) head.draw_axis(frame, bbox, head_poses[i]) head_poses = head_poses.tolist() if is_streaming: output = frame[:, :, ::-1] else: _, buf = cv2.imencode('.jpg', frame) output = buf.tostring() return { 'output': output, 'bboxes': bboxes.tolist(), 'probabilities': probabilities.tolist(), 'poses': head_poses, }
def process(inputs, ctx, **kwargs): LOG.info(inputs) ret = { 'params_text': PARAMS['params_text'] } if 'test_image' in inputs: test_image, _ = helpers.load_image(inputs, 'test_image', rgb=False) ret['test_image'] = f'image {test_image.shape[1]}x{test_image.shape[0]} with {test_image.shape[2]} colors' test_text = inputs.get('test_text') if test_text is not None: ret['test_text'] = test_text test_int = inputs.get('test_int') if test_int is not None: ret['test_int'] = test_int test_float = inputs.get('test_float') if test_float is not None: ret['test_float'] = test_float return ret
def process(inputs, ctx, **kwargs): frame, _ = helpers.load_image(inputs, 'input') left = frame.copy() h = frame.shape[0] w = frame.shape[1] face_driver = ctx.drivers[0] boxes = _detect_faces(face_driver, frame, treshhold) def add_box(b): for b0 in boxes: if box_intersection(b0, b) > 0.1: return boxes.append(b) for split_count in split_counts: size_multiplier = 2. / (split_count + 1) xstep = int(frame.shape[1] / (split_count + 1)) ystep = int(frame.shape[0] / (split_count + 1)) xlimit = int(np.ceil(frame.shape[1] * (1 - size_multiplier))) ylimit = int(np.ceil(frame.shape[0] * (1 - size_multiplier))) for x in range(0, xlimit, xstep): for y in range(0, ylimit, ystep): y_border = min( frame.shape[0], int(np.ceil(y + frame.shape[0] * size_multiplier))) x_border = min( frame.shape[1], int(np.ceil(x + frame.shape[1] * size_multiplier))) crop = frame[y:y_border, x:x_border, :] box_candidates = _detect_faces(face_driver, crop, treshhold, (x, y)) for b in box_candidates: add_box(b) for b in boxes: left = cv2.rectangle(left, (b[0], b[1]), (b[2], b[3]), (0, 255, 0), thickness=2) frame = np.concatenate([frame, left], axis=1) frame = cv2.resize(frame, (int(w), int(h / 2))) return {'output': frame}
def process(inputs, ctx): global net_loaded # check-lock-check if not net_loaded: with load_lock: if not net_loaded: _load(ctx) net_loaded = True img, _ = helpers.load_image(inputs, 'input', rgb=False) bboxes_, imgs_, _, skips = app_detector.process_faces_info(img) for i, ii in enumerate(imgs_): cv2.imwrite('/test/imgs_{}.png'.format(i), ii) bboxes, imgs = [], [] for idx, img_ in enumerate(imgs_): if idx not in skips: img_ = img_[:, :, ::-1] imgs.append(img_) bboxes.append(bboxes_[idx]) if len(bboxes) == 0: raise RuntimeError('no faces detected') if len(bboxes) > 1: raise RuntimeError('more than one face detected') bbox = bboxes[0][:4].astype(int) aug_imgs = app_classifier.apply_augmentation(imgs) embeddings = app_classifier.embeddings(aug_imgs) _, tmp_img = tempfile.mkstemp(suffix='.png') previews = images.get_images( img, np.array(bboxes), face_crop_size=app_detector.facenet_image_size, ) cv2.imwrite(tmp_img, previews[0]) with open(tmp_img, 'rb') as f: face = f.read() return dict(bbox=bbox, face=face, embeddings=embeddings)
def preprocess_detection(inputs, ctx, **kwargs): t = time.time() np_image, is_video = helpers.load_image(inputs, 'input') set_detection_params(inputs, ctx) output_type = inputs.get('output_type') if output_type is not None: if len(output_type.shape) < 1: ctx.output_type = output_type.tolist().decode() else: ctx.output_type = output_type[0].decode() else: ctx.output_type = PARAMS['output_type'] image = Image.fromarray(np_image) ctx.raw_image = cv2.imencode('.jpg', np_image)[1].tostring() # Rotate if exif tags specified # image = rotate_by_exif(image, ctx) # image = image.convert('RGB') ctx.image = image ctx.is_video = is_video if serving_hook.caption_type == serving_hook.IMAGE_CAPTIONING: preprocessed = serving_hook.load_image(image) ctx.caption_image = np.array(preprocessed, np.float32) ctx.np_image = np_image data = image.resize((300, 300), Image.ANTIALIAS) ctx.pose_image = np.array(data) data = np.array(data).transpose([2, 0, 1]).reshape([1, 3, 300, 300]) # convert to BGR data = data[:, ::-1, :, :] ctx.face_image = data input_key = list(kwargs.get('model_inputs').keys())[0] LOG.info('preprocess detection: %.3fms' % ((time.time() - t) * 1000)) ctx.t = time.time() return {input_key: [ctx.np_image]}
def process(inputs, ctx, **kwargs): global loaded if not loaded: with load_lock: if not loaded: global o o = optic_flow.OpticalFlow(ctx.drivers[0]) loaded = True image, is_video = helpers.load_image(inputs, 'input') boxes = None if ctx.drivers[0].driver_name != 'null': boxes, vectors = o.calc_human_speed(image, one_person=PARAMS['one_person']) if PARAMS['poses']: humans = e.inference( image, resize_to_default=True, upsample_size=PARAMS['resize_out_ratio'], crop_persons=PARAMS['crop_persons'], person_boxes=boxes, one_person=PARAMS['one_person'], ) image = e.draw_humans(image, humans, imgcopy=True, vectors=PARAMS['skeleton_vectors']) if ctx.drivers[0].driver_name != 'null': if PARAMS['draw_vectors']: o.draw_vectors(image, vectors) if PARAMS['draw_boxes']: o.draw_boxes(image) if is_video: image_output = image else: image_output = cv2.imencode(".jpg", image[:, :, ::-1])[1].tostring() return {'output': image_output}
def process(inputs, ctx, **kwargs): frame, is_streaming = helpers.load_image(inputs, 'input', rgb=False) LOG.info("frame shape: {}".format(frame.shape)) bboxes, probabilities = detect_bboxes(ctx.drivers[0], frame, PARAMS.get("detect_threshold", .5)) for bbox in bboxes.astype(int): cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1) if is_streaming: output = frame else: _, buf = cv2.imencode('.jpg', frame) output = buf.tostring() return { 'output': output, 'bboxes': bboxes.tolist(), 'probabilities': probabilities.tolist(), }
def process(inputs, ctx, **kwargs): frame, is_streaming = helpers.load_image(inputs, 'input', rgb=False) LOG.info("frame shape: {}".format(frame.shape)) detect_driver = ctx.drivers[0] bboxes, probabilities = detect_bboxes( detect_driver, frame, PARAMS.get("threshold", .5), ) age, gender = [], [] if len(bboxes) > 0: age_gender_driver = ctx.drivers[1] age, gender = age_gender(age_gender_driver, frame, bboxes, rgb=False) for i, bbox in enumerate(bboxes): draw_bbox( frame, bbox.astype(int), label="Detected face\nprobability: {:.2f}\nage: {}, gender: {}" .format(probabilities[i], age[i], "male" if gender[i] == 1 else "female"), ) if is_streaming: output = frame[:, :, ::-1] else: _, buf = cv2.imencode('.jpg', frame) output = buf.tostring() return { 'output': output, 'bboxes': bboxes.tolist(), 'probabilities': probabilities.tolist(), 'age': age, 'gender': gender, }
def process(inputs, ctx, **kwargs): original, is_video = helpers.load_image(inputs, 'input') image = original.copy() face_driver = ctx.drivers[0] age_driver = ctx.drivers[1] boxes = get_boxes(face_driver, image) font = cv2.FONT_HERSHEY_SIMPLEX for box in boxes: box = box.astype(int) img = crop_by_box(image, box, margin=0.4) img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR) prepared = img.transpose([2, 0, 1]) prepared = np.expand_dims(prepared, axis=0) outputs = age_driver.predict({'input.1': prepared.astype(np.float)}) output = special.softmax(list(outputs.values())[0]) predicted_ages = (output * idx_tensor).sum(axis=-1) cv2.rectangle( image, (box[0], box[1]), (box[2], box[3]), color=(250, 0, 0), thickness=2, lineType=cv2.LINE_AA, ) cv2.putText(image, str(int(round(predicted_ages[0]))), (box[0], box[1]), font, 1.5, color=(0, 0, 250), thickness=2, lineType=cv2.LINE_AA) if is_video: output = image else: _, buf = cv2.imencode('.jpg', image[:, :, ::-1]) output = buf.tostring() return {'output': output}
def process(inputs, ctx, **kwargs): LOG.info("process incoming") frame, is_streaming = helpers.load_image(inputs, 'input') if frame is None: raise RuntimeError("Unable to read frame") LOG.info("input frame size: {}".format(frame.shape)) network = ctx.global_ctx[0] styles = ctx.global_ctx[1] scale = 1.0 frame = detect_align(frame) LOG.info("aligned frame size: {}".format(frame.shape)) if frame is None: output = np.zeros((256, 256, 1), dtype="uint8") LOG.info("no aligned image") else: frame = (frame - 127.5) / 128.0 LOG.info("aligned image exists") images = np.tile(frame[None], [1, 1, 1, 1]) scales = scale * np.ones(1) output = network.generate_BA(images, scales, 16, styles=styles) output = 0.5*output + 0.5 output = (output[0] * 256).astype('uint8') LOG.info("output frame size: {}".format(output.shape)) if not is_streaming: _, buf = cv2.imencode('.jpg', output[:, :, ::-1]) output = buf.tostring() return {'output': output}
def process(inputs, ctx, **kwargs): original, is_video = helpers.load_image(inputs, 'input') image = original.copy() face_driver = ctx.drivers[0] facecheck_driver = ctx.drivers[1] input_name = list(facecheck_driver.inputs.keys())[0] boxes = get_boxes(face_driver, image) for box in boxes: box = box.astype(int) img = crop_by_box(image, box) prepared = prepare(img) prepared = np.expand_dims(prepared, axis=0) outputs = facecheck_driver.predict({input_name: prepared}) cv2.rectangle( image, (box[0], box[1]), (box[2], box[3]), (0, 250, 0), thickness=2, lineType=cv2.LINE_AA, ) cv2.putText( image, str(outputs['dense'][0][0]), (box[0], box[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 250, 0), 1, cv2.LINE_AA ) if is_video: output = image else: _, buf = cv2.imencode('.jpg', image[:, :, ::-1]) output = buf.tostring() return {'output': output}
def process(inputs, ctx, **kwargs): image, is_video = helpers.load_image(inputs, 'input') fa, torch_model = ctx.global_ctx face_driver = ctx.drivers[0] boxes = common.get_boxes(face_driver, image) if len(boxes) == 1: for box in boxes: crop_box = common.get_crop_box(image, box, margin=PARAMS['margin']) cropped = common.crop_by_box(image, box, margin=PARAMS['margin']) resized = cv2.resize(cropped, (PARAMS['image_size'], PARAMS['image_size']), interpolation=cv2.INTER_AREA) landmarks = fa.get_landmarks_from_image(image, [crop_box])[0] landmarks -= [crop_box[0], crop_box[1]] x_factor, y_factor = ( crop_box[2] - crop_box[0]) / PARAMS['image_size'], ( crop_box[3] - crop_box[1]) / PARAMS['image_size'] landmarks /= [x_factor, y_factor] landmark_img = video_extraction_conversion.draw_landmark( landmarks, size=(PARAMS['image_size'], PARAMS['image_size'], 3)) norm_image = torch.from_numpy(np.expand_dims( resized, axis=0)).type(dtype=torch.float) # K,256,256,3 norm_mark = torch.from_numpy(np.expand_dims( landmark_img, axis=0)).type(dtype=torch.float) # K,256,256,3 norm_image = (norm_image.permute([0, 3, 1, 2]) - 127.5) / 127.5 norm_mark = (norm_mark.permute([0, 3, 1, 2]) - 127.5) / 127.5 # K,3,256,256 t = time.time() with torch.no_grad(): outputs = torch_model(PARAMS['face'], norm_mark) LOG.info(f'model time: {time.time() - t}') t = time.time() output = get_picture(outputs) # cv2.imwrite( # 'VV.jpg', np.hstack([ # get_picture(outputs)[:, :, ::-1], # get_picture(norm_mark)[:, :, ::-1], # get_picture(norm_image)[:, :, ::-1], # get_picture(PARAMS['face'])[:, :, ::-1] # ]) # ) # import sys; sys.exit(1) output = cv2.resize( output, (PARAMS['face_shape'][1], PARAMS['face_shape'][0]), interpolation=cv2.INTER_AREA) # LOG.info(f'get and resize: {time.time() - t}') # t = time.time() face_box = PARAMS['face_box'] image = PARAMS['full'].copy() image[face_box[1]:face_box[3], face_box[0]:face_box[2]] = output # mask = np.ones_like(output) * 255 # center_box = ((face_box[2] + face_box[0]) // 2, (face_box[3] + face_box[1]) // 2) # image = cv2.seamlessClone(output, PARAMS['full'], mask, center_box, cv2.NORMAL_CLONE) # LOG.info(f'seamless clone: {time.time() - t}') else: image = PARAMS['full'] if is_video: output = image else: _, buf = cv2.imencode('.jpg', image[:, :, ::-1]) output = buf.tostring() return {'output': output}
def process(self, inputs, ctx, **kwargs): alpha = int(helpers.get_param(inputs, 'alpha', self._alpha)) original, is_video = helpers.load_image(inputs, 'image') if self._portret: original = np.transpose(original, (1, 0, 2)) output_view = helpers.get_param(inputs, 'output_view', self._output_view) if output_view == 'horizontal' or output_view == 'h': x0 = int(original.shape[1] / 4) x1 = int(original.shape[1] / 2) + x0 original = original[:, x0:x1, :] if output_view == 'vertical' or output_view == 'v': y0 = int(original.shape[0] / 4) y1 = int(original.shape[0] / 2) + y0 original = original[y0:y1, :, :] boxes = self.face_detector.bboxes(original) boxes.sort(key=lambda box: abs((box[3] - box[1]) * (box[2] - box[0])), reverse=True) box = None if len(boxes) > 0: box = boxes[0].astype(int) if box[3] - box[1] < 1 or box[2] - box[0] < 1: box = None image = original.copy() if box is not None and self.style_model is not None: inference_img, output, box = self.style_model.process( ctx, image, box) alpha = np.clip(alpha, 1, 255) if srt_2_bool( helpers.get_param(inputs, 'color_correction', self._color_correction)): output = color_tranfer(output, inference_img) if helpers.get_param(inputs, 'transfer_mode', self._transfer_mode) == 'direct': output = (inference_img * self._mask_orig + output * self._mask_face).astype(np.uint8) output = cv2.resize(output, (box[2] - box[0], box[3] - box[1]), interpolation=cv2.INTER_LINEAR) image[box[1]:box[3], box[0]:box[2], :] = output else: output = cv2.resize(np.array(output), (box[2] - box[0], box[3] - box[1]), interpolation=cv2.INTER_AREA) if helpers.get_param(inputs, 'transfer_mode', self._transfer_mode) == 'box_margin': xmin = max(0, box[0] - 50) wleft = box[0] - xmin ymin = max(0, box[1] - 50) wup = box[1] - ymin xmax = min(image.shape[1], box[2]) ymax = min(image.shape[0], box[3]) out = image[ymin:ymax, xmin:xmax, :] center = (wleft + output.shape[1] // 2, wup + output.shape[0] // 2) samples = int(helpers.get_param(inputs, 'samples', 0)) if samples > 1: results = { 's_0': cv2.imencode('.jpg', image[:, :, ::-1])[1].tostring() } step_alpha = 255.0 / (samples - 1) for si in range(samples - 1): alpha = int(step_alpha * (si + 1)) s_image = image.copy() #out_copy = out.copy() out = cv2.seamlessClone( output, out, np.ones_like(output) * alpha, center, cv2.NORMAL_CLONE) s_image[ymin:ymax, xmin:xmax, :] = out results[f's_{si + 1}'] = cv2.imencode( '.jpg', s_image[:, :, ::-1])[1].tostring() results['output'] = results['s_4'] return results else: out = cv2.seamlessClone(output, out, np.ones_like(output) * alpha, center, cv2.NORMAL_CLONE) image[ymin:ymax, xmin:xmax, :] = out else: center = (box[0] + output.shape[1] // 2, box[1] + output.shape[0] // 2) if not (center[0] >= output.shape[1] or box[1] + output.shape[0] // 2 >= output.shape[0]): image = cv2.seamlessClone(output, image, np.ones_like(output) * alpha, center, cv2.NORMAL_CLONE) if len(box) > 0: if srt_2_bool( helpers.get_param(inputs, "draw_box", self._draw_box)): image = cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2, 8) result = {} image = self.maybe_mirror(image) if output_view == 'horizontal' or output_view == 'h' or output_view == 'fh': image = np.hstack((self.maybe_mirror(original), image)) elif output_view == 'vertical' or output_view == 'v': image = np.vstack((self.maybe_mirror(original), image)) image = self.add_overlay(image) image = image[:, :, ::-1] if not is_video: image_bytes = cv2.imencode('.jpg', image)[1].tostring() else: image_bytes = image[:, :, ::-1] h = 480 w = int(480 * image.shape[1] / image.shape[0]) result['status'] = cv2.resize(image, (w, h)) result['output'] = image_bytes return result
def process(inputs, ct_x, **kwargs): original_image, is_video = load_image(inputs, 'inputs') if original_image is None: raise RuntimeError( 'Missing "inputs" key in inputs. Provide an image in "inputs" key') def _return(result): encoding = '' if not is_video: if result.shape[2] == 3: result = result[:, :, ::-1] result = cv2.imencode('.jpg', result)[1].tostring() encoding = 'jpeg' else: result = result result = cv2.imencode('.png', result)[1].tostring() encoding = 'png' return {'output': result, 'encoding': encoding} ratio = 1.0 w = float(original_image.shape[1]) h = float(original_image.shape[0]) if w > h: if w > 1024: ratio = w / 1024.0 else: if h > 1024: ratio = h / 1024.0 if ratio > 1: image = cv2.resize(original_image, (int(w / ratio), int(h / ratio))) else: image = original_image if not boolean_string(get_param(inputs, 'return_origin_size', False)): original_image = image try: area_threshold = int(get_param(inputs, 'area_threshold', 0)) except: area_threshold = 0 area_threshold = limit(area_threshold, 0, 100, 0) try: max_objects = int(get_param(inputs, 'max_objects', 1)) except: max_objects = 1 max_objects = limit(max_objects, 1, 10, 1) try: pixel_threshold = int( float(get_param(inputs, 'pixel_threshold', 0.5)) * 255) except: pixel_threshold = int(0.5 * 255) pixel_threshold = limit(pixel_threshold, 1, 254, int(0.5 * 255)) object_classes = [ obj_classes.get(get_param(inputs, 'object_class', 'Person'), 1) ] effect = get_param(inputs, 'effect', 'Remove background') # Remove background,Mask,Blur try: blur_radius = int(get_param(inputs, 'blur_radius', 2)) except: blur_radius = 2 blur_radius = limit(blur_radius, 1, 10, 2) outputs = ct_x.drivers[0].predict( {'inputs': np.expand_dims(image, axis=0)}) num_detection = int(outputs['num_detections'][0]) if num_detection < 1: return _return(original_image) process_width = image.shape[1] process_height = image.shape[0] image_area = process_width * process_height detection_boxes = outputs["detection_boxes"][0][:num_detection] detection_boxes = detection_boxes * [ process_height, process_width, process_height, process_width ] detection_boxes = detection_boxes.astype(np.int32) detection_classes = outputs["detection_classes"][0][:num_detection] detection_masks = outputs["detection_masks"][0][:num_detection] masks = [] for i in range(num_detection): if int(detection_classes[i]) not in object_classes: continue box = detection_boxes[i] mask_image = cv2.resize(detection_masks[i], (box[3] - box[1], box[2] - box[0]), interpolation=cv2.INTER_LINEAR) left = max(0, box[1] - 50) right = min(process_width, box[3] + 50) upper = max(0, box[0] - 50) lower = min(process_height, box[2] + 50) box_mask = np.pad(mask_image, ((box[0] - upper, lower - box[2]), (box[1] - left, right - box[3])), 'constant') area = int(np.sum(np.greater_equal(box_mask, 0.5).astype(np.int32))) if area * 100 / image_area < area_threshold: continue masks.append((area, box_mask, [upper, left, lower, right])) if len(masks) < 1: return _return(original_image) masks = sorted(masks, key=lambda row: -row[0]) total_mask = np.zeros((process_height, process_width), np.float32) min_left = process_width min_upper = process_height max_right = 0 max_lower = 0 for i in range(min(len(masks), max_objects)): pre_mask = masks[i][1] box = masks[i][2] left = max(0, box[1]) right = min(process_width, box[3]) upper = max(0, box[0]) lower = min(process_height, box[2]) box_mask = np.pad(pre_mask, ((upper, process_height - lower), (left, process_width - right)), 'constant') total_mask = np.maximum(total_mask, box_mask) if left < min_left: min_left = left if right > max_right: max_right = right if upper < min_upper: min_upper = upper if lower > max_lower: max_lower = lower mask = np.uint8(total_mask[min_upper:max_lower, min_left:max_right] * 255) box = (min_upper, min_left, max_lower, max_right) if len(mask.shape) > 2: logging.warning('Mask shape is {}'.format(mask.shape)) mask = mask[:, :, 0] image = cv2.resize(image[box[0]:box[2], box[1]:box[3], :], (320, 320)) mask = cv2.resize(mask, (320, 320)) mask[np.less_equal(mask, pixel_threshold)] = 0 mask[np.greater(mask, pixel_threshold)] = 255 input_trimap = generate_trimap(mask) input_trimap = np.expand_dims(input_trimap.astype(np.float32), 2) image = image.astype(np.float32) input_image = image - g_mean outputs = ct_x.drivers[1].predict({ 'input': np.expand_dims(input_image, axis=0), 'trimap': np.expand_dims(input_trimap, axis=0) }) mask = outputs.get('mask', None) if mask is None: mask = outputs['output'][0] * 255 mask = np.reshape(mask, (320, 320)) mask = np.clip(mask, 0, 255) mask = mask.astype(np.uint8) mask = cv2.resize(mask, (box[3] - box[1], box[2] - box[0])) mask = mask.astype(np.float32) / 255 mask = np.pad(mask, ((box[0], process_height - box[2]), (box[1], process_width - box[3])), 'constant') if mask.shape != original_image.shape: mask = cv2.resize(mask, (original_image.shape[1], original_image.shape[0])) mask = cv2.GaussianBlur(mask, (21, 21), 11) if effect == 'Remove background': background = None if 'background_img' in inputs: background, _ = load_image(inputs, 'background_img') if background is None: back_name = get_param(inputs, 'background', None) if back_name is not None: background = backgrounds.get(back_name) else: if glob_background is not None: background = glob_background else: background = backgrounds.get('None') add_style = get_param(inputs, 'style', '') if len(add_style) > 0: image = apply_style(original_image, add_style).astype(np.float32) else: image = original_image.astype(np.float32) mask = np.expand_dims(mask, 2) if background is not None: image = image * mask background = cv2.resize(background, (image.shape[1], image.shape[0])) background = background.astype(np.float32) background = background * (1 - mask) image = background + image image = image.astype(np.uint8) else: if not is_video: mask = (mask * 255).astype(np.uint8) image = image[:, :, ::-1].astype(np.uint8) image = np.concatenate([image, mask], axis=2) else: image = image * mask image = image.astype(np.uint8) elif effect == "Mask": mask = mask * 255 image = mask.astype(np.uint8) else: image = original_image.astype(np.float32) mask = np.expand_dims(mask, 2) foreground = mask * image radius = min(max(blur_radius, 2), 10) if effect == 'Grey': background = rgb2gray(original_image) else: background = cv2.GaussianBlur(original_image, (radius, radius), 10) background = (1.0 - mask) * background.astype(np.float32) image = foreground + background image = image.astype(np.uint8) return _return(image)
def process(inputs, ctx, **kwargs): img, is_video = helpers.load_image(inputs, 'image', rgb=False) img = ctx.global_ctx.process(img) if not is_video: img = cv2.imencode('.jpg', img)[1].tostring() return {'output': img}
def process(self, inputs, ctx, **kwargs): if self._configure_frame is not None: frame = self._configure_frame if frame is not None: h = 480 w = int(480 * frame.shape[1] / frame.shape[0]) return {'output': frame, 'status': cv2.resize(frame, (w, h))} if self._zoom > 0: cam = kwargs.get('metadata', {}).get('camera_vc', None) if cam is not None: LOG.info( f'Set camera CAP_PROP_ZOOM: ' f'{self._zoom} {cam.set(cv2.CAP_PROP_ZOOM, self._zoom)}') self._zoom = -1 alpha = int(self.get_param(inputs, 'alpha', self._alpha)) original, is_video = helpers.load_image(inputs, 'input') if self._portret: original = np.transpose(original, (1, 0, 2)) original_w = original.shape[1] original_h = original.shape[0] output_view = self.get_param(inputs, 'output_view', self._output_view) if output_view == 'horizontal' or output_view == 'h': x0 = int(original.shape[1] / 4) x1 = int(original.shape[1] / 2) + x0 original = original[:, x0:x1, :] if output_view == 'vertical' or output_view == 'v': y0 = int(original.shape[0] / 4) y1 = int(original.shape[0] / 2) + y0 original = original[y0:y1, :, :] boxes = self.face_detector.bboxes(original) boxes.sort(key=lambda box: abs((box[3] - box[1]) * (box[2] - box[0])), reverse=True) box = None if len(boxes) > 0: box = boxes[0].astype(int) if box[3] - box[1] < 1 or box[2] - box[0] < 1: box = None image = original.copy() if self._qr_code and box is None: qr_data = qr_decode(original) if len(qr_data) > 0 and len(qr_data[0].data): qr_data = qr_data[0] data = qr_data.data.decode() if len(data) > 0: # WIFI: S:tets;P: 123456;T: WPA;; logging.info('Found BarCode: {}'.format(data)) if 'WIFI:' in data and len(data) > 6: self._configure_frame = self.message_frame( original_h, original_w, "Start WIFI Configuration") config_tread = threading.Thread( target=self.wifi_configure, args=[data, original_h, original_w], daemon=True) config_tread.start() if box is not None and self.style_model is not None: inference_img, output, box = self.style_model.process( ctx, image, box) alpha = np.clip(alpha, 1, 255) if srt_2_bool( self.get_param(inputs, 'color_correction', self._color_correction)): output = color_tranfer(output, inference_img) if self.get_param(inputs, 'transfer_mode', self._transfer_mode) == 'direct': output = (inference_img * self._mask_orig + output * self._mask_face).astype(np.uint8) output = cv2.resize(output, (box[2] - box[0], box[3] - box[1]), interpolation=cv2.INTER_LINEAR) image[box[1]:box[3], box[0]:box[2], :] = output else: output = cv2.resize(np.array(output), (box[2] - box[0], box[3] - box[1]), interpolation=cv2.INTER_AREA) if self.get_param(inputs, 'transfer_mode', self._transfer_mode) == 'box_margin': xmin = max(0, box[0] - 50) wleft = box[0] - xmin ymin = max(0, box[1] - 50) wup = box[1] - ymin xmax = min(image.shape[1], box[2] + 50) ymax = min(image.shape[0], box[3] + 50) out = image[ymin:ymax, xmin:xmax, :] center = (wleft + output.shape[1] // 2, wup + output.shape[0] // 2) out = cv2.seamlessClone(output, out, np.ones_like(output) * alpha, center, cv2.NORMAL_CLONE) image[ymin:ymax, xmin:xmax, :] = out else: center = (box[0] + output.shape[1] // 2, box[1] + output.shape[0] // 2) if not (center[0] >= output.shape[1] or box[1] + output.shape[0] // 2 >= output.shape[0]): image = cv2.seamlessClone(output, image, np.ones_like(output) * alpha, center, cv2.NORMAL_CLONE) if len(box) > 0: if srt_2_bool( self.get_param(inputs, "draw_box", self._draw_box)): image = cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2, 8) # merge if self._background is not None: image = self._background.process(image) output_view = self.get_param(inputs, 'output_view', self._output_view) result = {} image = self.maybe_mirror(image) if output_view == 'horizontal' or output_view == 'h' or output_view == 'fh': image = np.hstack((self.maybe_mirror(original), image)) elif output_view == 'vertical' or output_view == 'v': image = np.vstack((self.maybe_mirror(original), image)) image = self.add_overlay(image) if not is_video: image = image[:, :, ::-1] image_bytes = cv2.imencode('.jpg', image)[1].tostring() else: image_bytes = image h = 480 w = int(480 * image.shape[1] / image.shape[0]) result['status'] = cv2.resize(image, (w, h)) result['output'] = image_bytes return result