def recognize_from_image(): # prepare input data img = cv2.imread(args.input, cv2.IMREAD_UNCHANGED) img = preprocess_image(img) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') classifier = ailia.Classifier(MODEL_PATH, WEIGHT_PATH, env_id=env_id, format=ailia.NETWORK_IMAGE_FORMAT_RGB, range=IMAGE_RANGE) # compute execution time for i in range(5): start = int(round(time.time() * 1000)) classifier.compute(img, MAX_CLASS_COUNT) end = int(round(time.time() * 1000)) print(f'ailia processing time {end - start} ms') # get result count = classifier.get_class_count() print(f'class_count: {count}') for idx in range(count): print(f"+ idx={idx}") info = classifier.get_class(idx) print(f" category={info.category} " + f"[ {resnet50_labels.imagenet_category[info.category]} ]") print(f" prob={info.prob}") print('Script finished successfully.')
def recognize_from_image(): # prepare input data org_img = load_image( args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), ) input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='127.5', gen_input_ailia=True) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict([input_data]) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict([input_data]) # postprocessing detections = postprocess(preds_ailia) # generate detections for detection in detections: plot_detections(org_img, detection, save_image_path=args.savepath) print('Script finished successfully.')
def extract_feature_vec_from_image(): # prepare input data input_img = load_image( args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None', ) input_data = prepare_input_data(input_img) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') fe_net = ailia.Net(FE_MODEL_PATH, FE_WEIGHT_PATH, env_id=env_id) fe_net.set_input_shape(input_data.shape) input_dict = {'data': input_data} # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) _ = fe_net.predict(input_dict)[0] end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: _ = fe_net.predict(input_dict)[0] # Extracting the output of a specifc layer idx = fe_net.find_blob_index_by_name('encode1') preds_ailia = fe_net.get_blob_data(idx) print(preds_ailia.reshape(preds_ailia.shape[0], -1)) print('Script finished successfully.')
def recognize_from_image(): # prepare input data input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='ImageNet', gen_input_ailia=True) src_img = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None') # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(input_data) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict(input_data) # postprocessing res_img = postprocess(src_img, preds_ailia) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def recognize_from_image(): # prepare input data src_img = cv2.imread(args.input) input_data = load_image( args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), ) input_data = input_data[np.newaxis, :, :, :] # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape(input_data.shape) # compute execution time for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(input_data) end = int(round(time.time() * 1000)) print(f'ailia processing time {end - start} ms') # postprocessing pred = preds_ailia.reshape((IMAGE_HEIGHT, IMAGE_WIDTH)) dst = transfer(src_img, pred) cv2.imwrite(args.savepath, dst)
def compare_images(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) features = [] # prepare input data for img_path in args.inputs: input_data = load_and_preprocess(img_path) # compute execution time for i in range(1): start = int(round(time.time() * 1000)) _ = net.predict(input_data) end = int(round(time.time() * 1000)) print(f'ailia processing time {end - start} ms') blob = net.get_blob_data(net.find_blob_index_by_name('conv5_3')) features.append(blob) # get result fname1 = os.path.basename(args.inputs[0]) fname2 = os.path.basename(args.inputs[1]) dist = distance(features[0], features[1]) print(f'{fname1} vs {fname2} = {dist}') if dist < THRESHOLD: print('Same person') else: print('Not same person') print('Script finished successfully.')
def recognize_from_image(): # prepare input data src_img = cv2.imread(args.input) img256, _, scale, pad = but.resize_pad(src_img[:, :, ::-1]) input_data = img256.astype('float32') / 255. input_data = np.expand_dims(np.moveaxis(input_data, -1, 0), 0) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for _ in range(5): start = int(round(time.time() * 1000)) preds = net.predict([input_data]) normalized_detections = but.postprocess(preds)[0] detections = but.denormalize_detections(normalized_detections, scale, pad) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds = net.predict([input_data]) normalized_detections = but.postprocess(preds)[0] detections = but.denormalize_detections(normalized_detections, scale, pad) # postprocessing display_result(src_img, detections) cv2.imwrite(args.savepath, src_img) print('Script finished successfully.')
def recognize_from_image(): # prepare input data img = load_image(args.input) print(f'input image shape: {img.shape}') # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(VOC_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_S_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV1, env_id=env_id ) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detector.compute(img, THRESHOLD, IOU) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: detector.compute(img, THRESHOLD, IOU) # plot result res_img = plot_results(detector, img, VOC_CATEGORY) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def recognize_from_image(): # prepare input data img = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), rgb=False, gen_input_ailia=True) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(img)[0] end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict(img)[0] # postprocess fig = gen_img_from_predsailia(img, preds_ailia) fig.savefig(args.savepath) print('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape((1, IMAGE_HEIGHT, IMAGE_WIDTH, 3)) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) fig = create_figure() tight_layout = True while(True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue input_image, resized_img = adjust_frame_size( frame, IMAGE_HEIGHT, IMAGE_WIDTH ) resized_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB) if args.apply_rotate: rotation_angle = np.random.randint(360) rotated_img = generate_rotated_image( resized_img, rotation_angle, size=(IMAGE_HEIGHT, IMAGE_WIDTH), crop_center=True, crop_largest_rect=True ) input_data = rotated_img.reshape((1, IMAGE_HEIGHT, IMAGE_WIDTH, 3)) else: rotation_angle = 0 rotated_img = resized_img input_data = rotated_img.reshape((1, IMAGE_HEIGHT, IMAGE_WIDTH, 3)) # inference preds_ailia = net.predict(input_data) # visualize predicted_angle = np.argmax(preds_ailia, axis=1)[0] plt = visualize(fig, rotated_img, rotation_angle, predicted_angle, tight_layout) plt.pause(.01) tight_layout = False capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_image(): # prepare input data img = io.imread(args.input) img = preProcess(img) input_data = padCropImg(img) input_data = input_data.astype(np.float32) / 255.0 ynum = input_data.shape[0] xnum = input_data.shape[1] preds_ailia = np.zeros((ynum, xnum, 128, 128, 3), dtype=np.float32) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for c in range(5): start = int(round(time.time() * 1000)) for j in range(ynum): for i in range(xnum): patchImg = input_data[j, i] patchImg = (patchImg - 0.5) / 0.5 patchImg = patchImg.transpose((2, 0, 1)) patchImg = patchImg[np.newaxis, :, :, :] out = net.predict(patchImg) out = out.transpose((0, 2, 3, 1))[0] out = (np.clip(out, 0, 1) * 255).astype(np.uint8) preds_ailia[j, i] = out end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: start = int(round(time.time() * 1000)) for j in range(ynum): for i in range(xnum): patchImg = input_data[j, i] patchImg = (patchImg - 0.5) / 0.5 patchImg = patchImg.transpose((2, 0, 1)) patchImg = patchImg[np.newaxis, :, :, :] out = net.predict(patchImg) out = out.transpose((0, 2, 3, 1))[0] out = (np.clip(out, 0, 1) * 255).astype(np.uint8) preds_ailia[j, i] = out end = int(round(time.time() * 1000)) # postprocessing resImg = composePatch(preds_ailia) resImg = postProcess(resImg) resImg.save(args.savepath) print('Script finished successfully.')
def recognize_from_image_tiling(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # padding input image img = cv2.imread(args.input) h, w = img.shape[0], img.shape[1] padding_w = int((w + IMAGE_WIDTH - 1) / IMAGE_WIDTH) * IMAGE_WIDTH padding_h = int((h+IMAGE_HEIGHT-1) / IMAGE_HEIGHT) * IMAGE_HEIGHT scale = int(OUTPUT_HEIGHT / IMAGE_HEIGHT) output_padding_w = padding_w * scale output_padding_h = padding_h * scale output_w = w * scale output_h = h * scale print(f'input image : {h}x{w}') print(f'output image : {output_w}x{output_h}') img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img / 255.0 img = img.transpose(2, 0, 1) img = img[np.newaxis, :, :, :] pad_img = np.zeros((1, 3, padding_h, padding_w)) pad_img[:, :, 0:h, 0:w] = img output_pad_img = np.zeros((1, 3, output_padding_h, output_padding_w)) tile_x = int(padding_w / IMAGE_WIDTH) tile_y = int(padding_h / IMAGE_HEIGHT) # Inference start = int(round(time.time() * 1000)) for y in range(tile_y): for x in range(tile_x): output_pad_img[ :, :, y*OUTPUT_HEIGHT:(y+1)*OUTPUT_HEIGHT, x*OUTPUT_WIDTH:(x+1)*OUTPUT_WIDTH ] = net.predict(pad_img[ :, :, y*IMAGE_HEIGHT:(y+1)*IMAGE_HEIGHT, x*IMAGE_WIDTH:(x+1)*IMAGE_WIDTH ]) end = int(round(time.time() * 1000)) print(f'ailia processing time {end - start} ms') # Postprocessing output_img = output_pad_img[0, :, :output_h, :output_w] output_img = output_img.transpose(1, 2, 0).astype(np.float32) output_img = cv2.cvtColor(output_img, cv2.COLOR_RGB2BGR) cv2.imwrite(args.savepath, output_img * 255) print('Script finished successfully.')
def recognize_from_video(): if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) # # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape((1, 3, args.duration, IMAGE_HEIGHT, IMAGE_WIDTH)) # prepare input data original_queue = deque([]) input_blob = np.empty((1, 3, args.duration, IMAGE_HEIGHT, IMAGE_WIDTH)) for i in range(args.duration - 1): ret, frame = capture.read() if not ret: continue original_queue.append(frame) input_blob[0, :, i, :, :] = convert_input_frame(frame) next_input_index = args.duration - 1 input_frame_size = capture.get(cv2.CAP_PROP_FRAME_COUNT) while (next_input_index <= input_frame_size or input_frame_size == 0): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue original_queue.append(frame) input_blob[0, :, args.duration - 1, :, :] = convert_input_frame(frame) if args.ailia: result = net.predict(input_blob) else: result = session.run( [output_name], {input_name: input_blob.astype(np.float32)})[0] print_mars_result(result) preview_img = original_queue.popleft() cv2.imshow('preview', preview_img) for i in range(args.duration - 1): input_blob[0, :, i, :, :] = input_blob[0, :, i + 1, :, :] next_input_index += 1 capture.release() print('Script finished successfully.')
def estimate_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(env_id) net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue input_image, input_data = preprocess_frame(frame, HEIGHT, WIDTH, data_rgb=False, normalize_type='None') # inference preds_ailia = net.predict(input_data) # estimated crowd count et_count = int(np.sum(preds_ailia)) # density map density_map = (255 * preds_ailia / np.max(preds_ailia))[0][0] density_map = cv2.resize(density_map, (input_image.shape[1], input_image.shape[0])) heatmap = cv2.applyColorMap(density_map.astype(np.uint8), cv2.COLORMAP_JET) cv2.putText( heatmap, f'Est Count: {et_count}', (40, 440), # position cv2.FONT_HERSHEY_SIMPLEX, # font 0.8, # fontscale (255, 255, 255), # color 2 # thickness ) res_img = np.hstack((input_image, heatmap)) cv2.imshow('frame', res_img) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_video(): etl_word = codecs.open(ETL_PATH, 'r', 'utf-8').readlines() # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') classifier = ailia.Classifier( MODEL_PATH, WEIGHT_PATH, env_id=env_id, format=ailia.NETWORK_IMAGE_FORMAT_GRAY, range=ailia.NETWORK_IMAGE_RANGE_U_FP32 ) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while(True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue in_frame, frame = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH) frame = preprocess_image(frame) # inference # compute execution time classifier.compute(frame, MAX_CLASS_COUNT) # get result count = classifier.get_class_count() print('==============================================================') print(f'class_count: {count}') for idx in range(count) : print(f"+ idx={idx}") info = classifier.get_class(idx) print(f" category={info.category} [ {etl_word[info.category]} ]" ) print(f" prob={info.prob}") cv2.imshow('frame', in_frame) time.sleep(SLEEP_TIME) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def _initialize_net(args, face_parser_path): env_id = ailia.get_gpu_environment_id() logger.info(f"env_id (face parser): {env_id}") if not args.onnx: net = ailia.Net(face_parser_path[0], face_parser_path[1], env_id=env_id) else: import onnxruntime net = onnxruntime.InferenceSession(face_parser_path[1]) return net
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') classifier = ailia.Classifier( MODEL_PATH, WEIGHT_PATH, env_id=env_id, format=ailia.NETWORK_IMAGE_FORMAT_RGB, range=ailia.NETWORK_IMAGE_RANGE_U_FP32 ) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while(True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue _, resized_frame = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH) input_data = cv2.cvtColor( resized_frame.astype(np.float32), cv2.COLOR_RGB2BGRA ).astype(np.uint8) classifier.compute(input_data, MAX_CLASS_COUNT) count = classifier.get_class_count() # show results print('==============================================================') print(f'class_count: {count}') for idx in range(count): print(f'+ idx={idx}') info = classifier.get_class(idx) print(f' category={info.category} [ ' +\ f'{googlenet_labels.imagenet_category[info.category]} ]') print(f' prob={info.prob}') cv2.imshow('frame', frame) time.sleep(SLEEP_TIME) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_image(): # prepare input data num = lambda val: int(re.sub("\\D", "", val)) sorted_inputs_path = sorted(os.listdir(args.input), key=num) input_blob = np.empty((1, 3, args.duration, IMAGE_HEIGHT, IMAGE_WIDTH)) for i, input_path in enumerate(sorted_inputs_path[0:args.duration]): img = load_image(args.input + '/' + input_path, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None', gen_input_ailia=True) input_blob[0, :, i, :, :] = img next_input_index = args.duration input_frame_size = len(sorted_inputs_path) # # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape((1, 3, args.duration, IMAGE_HEIGHT, IMAGE_WIDTH)) # inferece print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) result = net.predict(input_blob) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: while (next_input_index < input_frame_size): if cv2.waitKey(1) & 0xFF == ord('q'): break result = net.predict(input_blob) print_mars_result(result) preview_img = cv2.imread(args.input + '/' + sorted_inputs_path[next_input_index - args.duration]) cv2.imshow('preview', preview_img) for i in range(args.duration - 1): input_blob[0, :, i, :, :] = input_blob[0, :, i + 1, :, :] img = load_image(args.input + '/' + sorted_inputs_path[next_input_index], (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None', gen_input_ailia=True) input_blob[0, :, args.duration - 1, :, :] = img next_input_index += 1 print('Script finished successfully.')
def recognize_from_video(): # [WARNING] This is test impl print('[WARNING] This is test implementation') # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue img = preProcess(frame) input_data = padCropImg(img) input_data = input_data.astype(np.float32) / 255.0 ynum = input_data.shape[0] xnum = input_data.shape[1] preds_ailia = np.zeros((ynum, xnum, 128, 128, 3), dtype=np.float32) for j in range(ynum): for i in range(xnum): patchImg = input_data[j, i] patchImg = (patchImg - 0.5) / 0.5 patchImg = patchImg.transpose((2, 0, 1)) patchImg = patchImg[np.newaxis, :, :, :] out = net.predict(patchImg) out = out.transpose((0, 2, 3, 1))[0] out = (np.clip(out, 0, 1) * 255).astype(np.uint8) preds_ailia[j, i] = out resImg = composePatch(preds_ailia) resImg = postProcess(resImg) cv2.imshow('frame', img_as_ubyte(resImg)) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') enc_net = ailia.Net(ENC_MODEL_PATH, ENC_WEIGHT_PATH, env_id=env_id) dec_net = ailia.Net(DEC_MODEL_PATH, DEC_WEIGHT_PATH, env_id=env_id) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) ret, frame = capture.read() org_height, org_width, _ = frame.shape while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue _, input_data = preprocess_frame(frame, IMAGE_HEIGHT, IMAGE_WIDTH) # encoder enc_input_blobs = enc_net.get_input_blob_list() enc_net.set_input_blob_data(input_data, enc_input_blobs[0]) enc_net.update() features = enc_net.get_results() # decoder dec_inputs_blobs = dec_net.get_input_blob_list() for f_idx in range(len(features)): dec_net.set_input_blob_data(features[f_idx], dec_inputs_blobs[f_idx]) dec_net.update() preds_ailia = dec_net.get_results() # postprocessing disp = preds_ailia[-1] disp_resized, vmax = result_plot(disp, org_width, org_height) plt.imshow(disp_resized, cmap='magma', vmax=vmax) plt.pause(.01) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def compare_videoframe_image(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # img part fname = args.video[1] input_data = load_and_preprocess(fname) _ = net.predict(input_data) i_feature = net.get_blob_data(net.find_blob_index_by_name('conv5_3')) # video part if args.video[0] == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue _, resized_frame = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH) input_data = preprocess(resized_frame, input_is_bgr=True) # inference _ = net.predict(input_data) v_feature = net.get_blob_data(net.find_blob_index_by_name('conv5_3')) # show result dist = distance(i_feature, v_feature) print('=============================================================') print(f'{os.path.basename(fname)} vs video frame = {dist}') if dist < THRESHOLD: print('Same person') else: print('Not same person') cv2.imshow('frame', resized_frame) time.sleep(SLEEP_TIME) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) save_h, save_w = webcamera_utils.calc_adjust_fsize( f_h, f_w, IMAGE_HEIGHT, IMAGE_WIDTH) writer = webcamera_utils.get_writer(args.savepath, save_h, save_w) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break input_image, input_data = webcamera_utils.preprocess_frame( frame, IMAGE_HEIGHT, IMAGE_WIDTH, normalize_type='127.5') # inference input_blobs = net.get_input_blob_list() net.set_input_blob_data(input_data, input_blobs[0]) net.update() preds_ailia = net.get_results() # postprocessing detections = postprocess(preds_ailia) show_result(input_image, detections) cv2.imshow('frame', input_image) # save results if writer is not None: writer.write(input_image) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) # bert tokenizer if LANG == 'en': tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') elif LANG == 'jp': tokenizer = BertTokenizer( 'vocab.txt', do_lower_case=False, do_basic_tokenize=False ) # prepare data dummy_input = np.ones((1, MAX_SEQ_LEN), dtype=np.int64) tokens_ts, segments_ts, masked_index = text2token( SENTENCE, tokenizer, lang=LANG ) input_data = np.array([tokens_ts, segments_ts]) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # compute execution time for i in range(5): start = int(round(time.time() * 1000)) input_blobs = net.get_input_blob_list() for i, idx in enumerate(input_blobs): if i < len(input_data): net.set_input_blob_data(input_data[i], idx) else: net.set_input_blob_data(dummy_input, idx) net.update() preds_ailia = net.get_results() # preds_ailia = net.predict(dummy_input)[0] end = int(round(time.time() * 1000)) print("ailia processing time {} ms".format(end-start)) # masked word prediction predicted_indices = np.argsort( preds_ailia[0][0][masked_index] )[-NUM_PREDICT:][::-1] predicted_tokens = tokenizer.convert_ids_to_tokens(predicted_indices) print('Input sentence: ' + SENTENCE) print(f'predicted top {NUM_PREDICT} words: {predicted_tokens}') print('Script finished successfully.')
def segment_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) ailia_input_w = net.get_input_shape()[3] ailia_input_h = net.get_input_shape()[2] if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while(True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue input_image, input_data = preprocess_frame( frame, ailia_input_h, ailia_input_w, normalize_type='127.5' ) # inference input_blobs = net.get_input_blob_list() net.set_input_blob_data(input_data, input_blobs[0]) net.update() preds_ailia = np.array(net.get_results())[0, 0] # TODO why? # postprocessing seg_map = np.argmax(preds_ailia.transpose(1, 2, 0), axis=2) seg_image = label_to_color_image(seg_map).astype(np.uint8) # showing the segmented image (simple) seg_image = cv2.cvtColor(seg_image, cv2.COLOR_RGB2BGR) seg_image = cv2.resize( seg_image, (input_image.shape[1], input_image.shape[0]) ) cv2.imshow('frame', seg_image) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_image(): # prepare input data src_img = cv2.imread(args.input) crop_size = (max(src_img.shape[0], src_img.shape[1]), max(src_img.shape[0], src_img.shape[1])) src_img = safe_crop(src_img, crop_size) # net initialize env_id = 0 # use cpu because overflow fp16 range #env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape((1, IMAGE_HEIGHT, IMAGE_WIDTH, 4)) if args.trimap == "": input_data = src_img env_id = ailia.get_gpu_environment_id() seg_net = ailia.Net(SEGMENTATION_MODEL_PATH, SEGMENTATION_WEIGHT_PATH, env_id=env_id) trimap_data, seg_data = generate_trimap(seg_net, input_data) else: trimap_data = cv2.imread(args.trimap) trimap_data = safe_crop(trimap_data, crop_size) seg_data = trimap_data.copy() input_data, src_img, trimap_data = matting_preprocess( src_img, trimap_data, seg_data) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(input_data) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict(input_data) # postprocessing res_img = postprocess(src_img, trimap_data, preds_ailia) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) # check folder existing if not os.path.exists(args.input): print("error : directory not found "+args.input) sys.exit(1) if not os.path.exists(args.output): os.mkdir(args.output) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') if args.arch == 'blazeface': detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) else: detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id ) # process images no = 0 for src_dir, dirs, files in os.walk(args.input): files = sorted(files) for file_ in files: root, ext = os.path.splitext(file_) if file_==".DS_Store": continue if file_=="Thumbs.db": continue if not(ext == ".jpg" or ext == ".png" or ext == ".bmp"): continue print(src_dir+"/"+file_) folders=src_dir.split("/") folder=folders[len(folders)-1] dst_dir = args.output+"/"+folder if not os.path.exists(dst_dir): os.mkdir(dst_dir) dst_path= dst_dir+ "/"+str(no)+".jpg" recognize_from_image(detector,dst_path,src_dir,file_) no=no+1
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') pose = ailia.PoseEstimator(MODEL_PATH, WEIGHT_PATH, env_id=env_id, algorithm=ALGORITHM) baseline = ailia.Net(BASELINE_MODEL_PATH, BASELINE_WEIGHT_PATH, env_id=env_id) baseline.set_input_shape((1, 32)) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue input_image, input_data = adjust_frame_size( frame, IMAGE_HEIGHT, IMAGE_WIDTH, ) input_data = cv2.cvtColor(input_data, cv2.COLOR_BGR2BGRA) # inferece _ = pose.compute(input_data) # postprocessing display_result(input_image, pose, baseline) cv2.imshow('frame', input_image) # display 3d pose plt.pause(0.01) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def compare_image_and_video(): # prepare base image base_imgs = prepare_input_data(args.video[1]) # net itinialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # web camera if args.video[0] == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[Error] webcamera not found") sys.exit(1) else: if check_file_existance(args.video[0]): capture = cv2.VideoCapture(args.video[0]) # inference loop while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue frame, resized_frame = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH) input_frame = preprocess_image(resized_frame, input_is_bgr=True) input_data = np.concatenate([base_imgs, input_frame], axis=0) # inference preds_ailia = net.predict(input_data) # postprocessing fe_1 = np.concatenate([preds_ailia[0], preds_ailia[1]], axis=0) fe_2 = np.concatenate([preds_ailia[2], preds_ailia[3]], axis=0) sim = cosin_metric(fe_1, fe_2) bool_sim = False if THRESHOLD > sim else True frame = draw_result_on_img( frame, texts=[f"Similarity: {sim:06.3f}", f"SAME FACE: {bool_sim}"]) cv2.imshow('frame', frame) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=RANGE, algorithm=ALGORITHM, env_id=env_id ) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while(True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue _, resized_img = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH) img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2BGRA) detector.compute(img, THRESHOLD, IOU) detections = [] for idx in range(detector.get_object_count()): obj = detector.get_object(idx) detections.append(obj) detections=nms_between_categories(detections,frame.shape[1],frame.shape[0],categories=[0,1],iou_threshold=IOU) res_img = plot_results(detections, resized_img, FACE_CATEGORY, False) cv2.imshow('frame', res_img) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def __init__(self): check_and_download_models(WEIGHT_PATH_YOLO, MODEL_PATH_YOLO, REMOTE_PATH_YOLO) # net initialize env_id = ailia.get_gpu_environment_id() self.detector = detector = ailia.Detector( MODEL_PATH_YOLO, WEIGHT_PATH_YOLO, len(FACE_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id)