def recognize_from_video(video): env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if pathlib.Path(video).exists(): capture = cv2.VideoCapture(video) while(True): ret, img = capture.read() objs = detect_objects(img, detector) for obj in objs: dbface_utils.drawbbox(img, obj) cv2.imshow('frame', img) # press q to end video capture if cv2.waitKey(1)&0xFF == ord('q'): break if not ret: continue capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_image(): # prepare input data img = load_image(args.input) print(f'input image shape: {img.shape}') # net initialize detector = ailia.Detector( MODEL_PATH, WEIGHT_PATH, len(COCO_CATEGORY), format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=args.env_id, ) pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=args.env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detector.compute(img, THRESHOLD, IOU) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: detector.compute(img, THRESHOLD, IOU) # plot result res_img = plot_results(detector, pose, img, COCO_CATEGORY) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def recognize_from_image(): # prepare input data org_img = load_image( args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), ) input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='127.5', gen_input_ailia=True) # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict([input_data]) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict([input_data]) # postprocessing detections = but.postprocess(preds_ailia) # generate detections for detection in detections: but.plot_detections(org_img, detection, save_image_path=args.savepath) print('Script finished successfully.')
def main(): info = { ("paris-streetview", "rect"): (WEIGHT_PARIS_STREETVIEW_PATH, MODEL_PARIS_STREETVIEW_PATH, (256, 256)), ("celebahq", "rect"): (WEIGHT_CELEBAHQ_256_PATH, MODEL_CELEBAHQ_256_PATH, (256, 256)), ("celebahq-512", "rect"): (WEIGHT_CELEBAHQ_512_PATH, MODEL_CELEBAHQ_512_PATH, (512, 512)), ("celebahq-512", "stroke"): (WEIGHT_CELEBAHQ_FREEFORM_PATH, MODEL_CELEBAHQ_FREEFORM_PATH, (512, 512)), ("places2", "stroke"): (WEIGHT_PLACE2_PATH, MODEL_PLACE2_PATH, (512, 680)), } key = (args.model, args.mask_type) if key not in info: logger.error("(MODEL = %s, MASK_TYPE = %s) is unmatch." % key) logger.info("appropriate settings:\n" "\t(MODEL = paris-streetview, MASK_TYPE = rect)\n" "\t(MODEL = celebahq, MASK_TYPE = rect)\n" "\t(MODEL = celebahq-512, MASK_TYPE = rect)\n" "\t(MODEL = celebahq-512, MASK_TYPE = stroke)\n" "\t(MODEL = places2, MASK_TYPE = stroke)") sys.exit(-1) # model files check and download weight_path, model_path, img_shape = info[key] check_and_download_models(weight_path, model_path, REMOTE_PATH) # net initialize net = ailia.Net(model_path, weight_path, env_id=args.env_id) recognize_from_image(net, img_shape)
def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) # load audio for input_data_path in args.input: logger.info('=' * 80) logger.info(f'input: {input_data_path}') data = sf.read(input_data_path) # create instance session = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for c in range(5): start = int(round(time.time() * 1000)) label, conf = crnn(data, session) end = int(round(time.time() * 1000)) logger.info("\tailia processing time {} ms".format(end - start)) else: label, conf = crnn(data, session) logger.info(label) logger.info(conf) logger.info('Script finished successfully.')
def enhance_image(): for image_path in args.input: # prepare input data img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) img = cv2.resize(img, dsize=(H, W)) # net initialize model = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) upsampler = RealESRGAN(model) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) output = upsampler.enhance(img) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: output = upsampler.enhance(img) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, output) logger.info('Script finished successfully.')
def recognize_from_image(): # prepare input data input_data = load_image( args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='ImageNet', gen_input_ailia=True ) # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(input_data) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict(input_data) # postprocessing print_results(preds_ailia, vgg16_labels.imagenet_category) print('Script finished successfully.')
def process_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if args.face_recognition: locator = FaceLocator() else: locator = None if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue img = process_frame(net, locator, frame) cv2.imshow('frame', img[..., ::-1]) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_image(): # prepare input data input_img = cv2.imread(args.input) data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='255', gen_input_ailia=True) # net initalize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # compute execution time for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(data)[0] end = int(round(time.time() * 1000)) print(f'ailia processing time {end - start} ms') visualize_plots(input_img, preds_ailia) cv2.imwrite(args.savepath, input_img) # Confidence Map channels = preds_ailia.shape[0] cols = 8 plot_images('confidence', preds_ailia, tile_shape=((int)((channels + cols - 1) / cols), cols)) print('Script finished successfully.')
def wavfile_input_recognition(): if args.beamdecode: try: from ctcdecode import CTCBeamDecoder except ImportError: raise ImportError("BeamCTCDecoder requires paddledecoder package.") decoder = CTCBeamDecoder( LABELS, LM_PATH, ALPHA, BETA, CUTOFF_TOP_N, CUTOFF_PROB, BEAM_WIDTH, NUM_PROCESS, BRANK_LABEL_INDEX, ) # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) for soundf_path in args.input: logger.info(soundf_path) if args.ailia_audio: wav,sr = sf.read(soundf_path) wav = ailia.audio.resample(wav,sr,SAMPLING_RATE) else: wav = librosa.load(soundf_path, sr=SAMPLING_RATE)[0] spectrogram = create_spectrogram(wav) net.set_input_shape(spectrogram[0].shape) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for c in range(5): start = int(round(time.time() * 1000)) preds_ailia, output_length = net.predict(spectrogram) end = int(round(time.time() * 1000)) logger.info("\tailia processing time {} ms".format(end-start)) else: # Deep Speech output: output_probability, output_length preds_ailia, output_length = net.predict(spectrogram) if args.beamdecode: text = beam_ctc_decode( torch.from_numpy(preds_ailia), torch.from_numpy(output_length), decoder, ) else: text = decode(preds_ailia[0], output_length) savepath = get_savepath(args.savepath, soundf_path, ext='.txt') logger.info(f'Results saved at : {savepath}') with open(savepath, 'w', encoding='utf-8') as f: f.write(text) logger.info(f'predict sentence:\n{text}') logger.info('Script finished successfully.')
def transform_image(): """Full transormation on a single image loaded from filepath in arguments.""" image = cv2.imread(args.input) env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if args.face_recognition: locator = FaceLocator() else: locator = None if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) out_image = process_frame(net, locator, image) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: out_image = process_frame(net, locator, image) cv2.imwrite(args.savepath, out_image[..., ::-1]) return True
def recognize_from_image(): # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) input_img = load_image( image_path, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None', ) input_data = get_processed_image(input_img) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) preds = net.predict(input_data) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: preds = net.predict(input_data) # show results print_results(preds, inceptionv4_labels.imagenet_category) logger.info('Script finished successfully.')
def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) if args.arch == 'bert-base-cased' or args.arch == 'bert-base-uncased': tokenizer = BertTokenizer.from_pretrained(args.arch) else: tokenizer = BertJapaneseTokenizer.from_pretrained("cl-tohoku/" + args.arch) net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) net.set_input_blob_shape((1, PADDING_LEN), net.find_blob_index_by_name("token_type_ids")) net.set_input_blob_shape((1, PADDING_LEN), net.find_blob_index_by_name("input_ids")) net.set_input_blob_shape((1, PADDING_LEN), net.find_blob_index_by_name("attention_mask")) with codecs.open(args.input[0], 'r', 'utf-8', 'ignore') as f: s = f.readlines() for text in s: tokenized_text = tokenizer.tokenize(text) original_text_len = len(tokenized_text) for j in range(len(tokenized_text), PADDING_LEN): tokenized_text.append('[PAD]') score = numpy.zeros((len(tokenized_text))) suggest = {} for i in range(0, len(tokenized_text)): masked_index = i if tokenized_text[masked_index] == '[PAD]': continue tokenized_text_saved = tokenized_text[masked_index] tokenized_text[masked_index] = '[MASK]' outputs = inference(net, tokenizer, tokenized_text, masked_index, original_text_len) target_ids = tokenizer.convert_tokens_to_ids( [tokenized_text_saved]) index = target_ids[0] score[masked_index] = outputs[0][0, masked_index][index] predictions = torch.from_numpy(outputs[0][0, masked_index]).topk(1) index = predictions.indices[0] top_token = tokenizer.convert_ids_to_tokens([index])[0] suggest[masked_index] = top_token tokenized_text[masked_index] = tokenized_text_saved fine_text = colorize(tokenized_text, score, suggest) print(fine_text) print('Script finished successfully.')
def recognize_from_image(): # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) src_img = cv2.imread(image_path) input_data = load_image( image_path, (IMAGE_HEIGHT, IMAGE_WIDTH), ) input_data = input_data[np.newaxis, :, :, :] net.set_input_shape(input_data.shape) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(input_data) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict(input_data) # postprocessing pred = preds_ailia.reshape((IMAGE_HEIGHT, IMAGE_WIDTH)) dst = transfer(src_img, pred) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, dst) logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) if args.video == '0': print('[INFO] Webcam mode is activated') capture = cv2.VideoCapture(0) if not capture.isOpened(): print("[ERROR] webcamera not found") sys.exit(1) else: if check_file_existance(args.video): capture = cv2.VideoCapture(args.video) while (True): ret, frame = capture.read() if cv2.waitKey(1) & 0xFF == ord('q'): break if not ret: continue frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) input_data = preprocess(frame) net.set_input_shape(input_data.shape) boxes, labels, scores, masks = net.predict([input_data]) display_objdetect_image(frame, boxes, labels, scores, masks) plt.pause(.01) capture.release() cv2.destroyAllWindows() print('Script finished successfully.')
def recognize_from_image(): # prepare input data image = Image.open(args.input) input_data = preprocess(image) # net initialize # This model requires fuge gpu memory so fallback to cpu mode env_id = args.env_id if env_id != -1 and ailia.get_environment(env_id).props == "LOWPOWER": env_id = -1 net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape(input_data.shape) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) boxes, labels, scores, masks = net.predict([input_data]) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: boxes, labels, scores, masks = net.predict([input_data]) # postprocessing fig, ax = create_figure() display_objdetect_image( fig, ax, image, boxes, labels, scores, masks, savepath=args.savepath ) print('Script finished successfully.')
def main(): # model files check and download info = { 'lip': (WEIGHT_LIP_PATH, MODEL_LIP_PATH, (IMAGE_LIP_SIZE, IMAGE_LIP_SIZE), CATEGORY_LIP), 'atr': (WEIGHT_ATR_PATH, MODEL_ATR_PATH, (IMAGE_ATR_SIZE, IMAGE_ATR_SIZE), CATEGORY_ATR), 'pascal': (WEIGHT_PASCAL_PATH, MODEL_PASCAL_PATH, (IMAGE_PASCAL_SIZE, IMAGE_ATR_SIZE), CATEGORY_PASCAL), } weight_path, model_path, img_size, category = info[args.arch] check_and_download_models(weight_path, model_path, REMOTE_PATH) # Workaround for accuracy issue on # ailia SDK 1.2.4 + opset11 + gpu (metal/vulkan) detector = ailia.Net(model_path, weight_path, env_id=args.env_id) params = {'img_size': img_size, 'category': category} if args.video is not None: # video mode recognize_from_video(args.video, detector, params) else: # image mode # input image loop for image_path in args.input: # prepare input data logger.info(image_path) recognize_from_image(image_path, detector, params) logger.info('Script finished successfully.')
def recognize_from_image(): # prepare input data input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), gen_input_ailia=True) # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(input_data) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict(input_data) # postprocessing if args.smooth: preds_ailia = smooth_output(preds_ailia) save_pred(preds_ailia, args.savepath, IMAGE_HEIGHT, IMAGE_WIDTH) print('Script finished successfully.')
def enhance_video(): # net initialize model = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) upsampler = RealESRGAN(model) capture = get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: logger.warning( 'currently, video results cannot be output correctly...') f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) save_h, save_w = calc_adjust_fsize(f_h, f_w, IMAGE_HEIGHT, IMAGE_WIDTH) writer = get_writer(args.savepath, save_h, save_w * 2) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break img = cv2.resize(frame, dsize=(H, W)) # inference output = upsampler.enhance(img) #plot result cv2.imshow('frame', output) if writer is not None: writer.release() logger.info('Script finished successfully.')
def microphone_input_recognition(): env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') if args.beamdecode: try: from ctcdecode import CTCBeamDecoder except ImportError: raise ImportError("BeamCTCDecoder requires paddledecoder package.") decoder = CTCBeamDecoder(LABELS, LM_PATH, ALPHA, BETA, CUTOFF_TOP_N, CUTOFF_PROB, BEAM_WIDTH, NUM_PROCESS, BRANK_LABEL_INDEX) while True: wav = record_microphone_input() spectrogram = create_spectrogram(wav) # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape(spectrogram[0].shape) # inference print('Translating...') #Deep Speech output: output_probability, output_length preds_ailia, output_length = net.predict(spectrogram) if args.beamdecode: text = beam_ctc_decode(torch.from_numpy(preds_ailia), torch.from_numpy(output_length), decoder) else: text = decode(preds_ailia[0], output_length) print(f'predict sentence:\n{text}\n') time.sleep(1)
def extract_feature_vec_from_image(): # prepare input data input_img = load_image( args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None', ) input_data = prepare_input_data(input_img) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') fe_net = ailia.Net(FE_MODEL_PATH, FE_WEIGHT_PATH, env_id=env_id) fe_net.set_input_shape(input_data.shape) input_dict = {'data': input_data} # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) _ = fe_net.predict(input_dict)[0] end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: _ = fe_net.predict(input_dict)[0] # Extracting the output of a specifc layer idx = fe_net.find_blob_index_by_name('encode1') preds_ailia = fe_net.get_blob_data(idx) print(preds_ailia.reshape(preds_ailia.shape[0], -1)) print('Script finished successfully.')
def recognize_from_image(): # prepare input data input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='ImageNet', gen_input_ailia=True) src_img = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='None') # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(input_data) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict(input_data) # postprocessing res_img = postprocess(src_img, preds_ailia) cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def recognize_from_image(): # prepare input data input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH), normalize_type='255', gen_input_ailia=True) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(input_data) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: preds_ailia = net.predict(input_data) # postprocessing output_img = preds_ailia[0].transpose((1, 2, 0)) output_img = cv2.cvtColor(output_img, cv2.COLOR_RGB2BGR) cv2.imwrite(args.savepath, output_img * 255) print('Script finished successfully.')
def main(): # model files check and download check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) ailia_model = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) tokenizer = DistilBertTokenizer.from_pretrained( 'distilbert-base-uncased-finetuned-sst-2-english') model_inputs = tokenizer.encode_plus(args.input, return_tensors="pt") inputs_onnx = { k: v.cpu().detach().numpy() for k, v in model_inputs.items() } logger.info("Input : " + str(args.input)) # inference if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) score = ailia_model.predict(inputs_onnx) end = int(round(time.time() * 1000)) logger.info("\tailia processing time {} ms".format(end - start)) else: score = ailia_model.predict(inputs_onnx) score = numpy.exp(score) / numpy.exp(score).sum(-1, keepdims=True) label_name = ["negative", "positive"] label_id = numpy.argmax(numpy.array(score)) logger.info("Label : " + str(label_name[label_id])) logger.info("Score : " + str(score[0][0][label_id])) logger.info('Script finished successfully.')
def main(): dic_model = { 'blouse': (WEIGHT_BLOUSE_PATH, MODEL_BLOUSE_PATH, IMAGE_BLOUSE_PATH), 'dress': (WEIGHT_DRESS_PATH, MODEL_DRESS_PATH, IMAGE_DRESS_PATH), 'outwear': (WEIGHT_OUTWEAR_PATH, MODEL_OUTWEAR_PATH, IMAGE_OUTWEAR_PATH), 'skirt': (WEIGHT_SKIRT_PATH, MODEL_SKIRT_PATH, IMAGE_SKIRT_PATH), 'trousers': (WEIGHT_TROUSERS_PATH, MODEL_TROUSERS_PATH, IMAGE_TROUSERS_PATH), } weight_path, model_path, img_path = dic_model[args.clothing_type] # model files check and download check_and_download_models(weight_path, model_path, REMOTE_PATH) # initialize net = ailia.Net(model_path, weight_path, env_id=args.env_id) if args.video is not None: # video mode recognize_from_video(args.video, net) else: # image mode # input image loop for image_path in args.input: logger.info(image_path) recognize_from_image(image_path, net) logger.info('Script finished successfully.')
def main(): info = { ("celeba", 256): (WEIGHT_CELEBA256_PATH, MODEL_CELEBA256_PATH, (256, 256)), ("places", 256): (WEIGHT_PLACES256_PATH, MODEL_PLACES256_PATH, (256, 256)), ("places", 512): (WEIGHT_PLACES512_PATH, MODEL_PLACES512_PATH, (512, 512)), ("places", 1024): (WEIGHT_PLACES1024_PATH, MODEL_PLACES1024_PATH, (1024, 1024)) } key = (args.model, args.img_res) if key not in info: logger.error("(MODEL = %s, IMG_RESOLUTION = %s) is unmatch." % key) logger.info("appropriate settings:\n" "\t(MODEL = celeba, IMG_RESOLUTION = 256)\n" "\t(MODEL = places, IMG_RESOLUTION = 256 or 512 or 1024)") sys.exit(-1) if "FP16" in ailia.get_environment( args.env_id).props or platform.system() == 'Darwin': logger.warning('This model do not work on FP16. So use CPU mode.') args.env_id = 0 # model files check and download weight_path, model_path, img_shape = info[key] check_and_download_models(weight_path, model_path, REMOTE_PATH) # net initialize net = ailia.Net(model_path, weight_path, env_id=args.env_id) recognize_from_image(net, img_shape)
def compare_images(): # prepare input data imgs_1 = prepare_input_data(args.inputs[0]) imgs_2 = prepare_input_data(args.inputs[1]) imgs = np.concatenate([imgs_1, imgs_2], axis=0) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # compute execution time print('Start inference...') for i in range(5): start = int(round(time.time() * 1000)) preds_ailia = net.predict(imgs) end = int(round(time.time() * 1000)) print(f'ailia processing time {end - start} ms') # postprocessing fe_1 = np.concatenate([preds_ailia[0], preds_ailia[1]], axis=0) fe_2 = np.concatenate([preds_ailia[2], preds_ailia[3]], axis=0) sim = cosin_metric(fe_1, fe_2) print(f'Similarity of ({args.inputs[0]}, {args.inputs[1]}) : {sim:.3f}') if THRESHOLD > sim: print('They are not the same face!') else: print('They are the same face!')
def recognize_from_image(): # prepare input data image = Image.open(args.input) input_data = preprocess(image) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape(input_data.shape) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) boxes, labels, scores, masks = net.predict([input_data]) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: boxes, labels, scores, masks = net.predict([input_data]) # postprocessing display_objdetect_image(image, boxes, labels, scores, masks, savepath=args.savepath) print('Script finished successfully.')
def main(): # model files check and download print("=== ST-GCN model ===") check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH) print("=== OpenPose model ===") check_and_download_models(WEIGHT_POSE_PATH, MODEL_POSE_PATH, REMOTE_POSE_PATH) # net initialize net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) if args.arch == "pyopenpose": pose = op.WrapperPython() params = dict(model_folder='.', model_pose='COCO') pose.configure(params) pose.start() else: pose = ailia.PoseEstimator(MODEL_POSE_PATH, WEIGHT_POSE_PATH, env_id=args.env_id, algorithm=POSE_ALGORITHM) if args.arch == "openpose": pose.set_threshold(0.1) if args.video is not None: # realtime mode recognize_realtime(args.video, pose, net) else: # offline mode recognize_from_file(args.input, pose, net)
def recognize_from_image(filename): # load input image img = load_image(filename) print(f'input image shape: {img.shape}') img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) objs = detect_objects(img, detector) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: objs = detect_objects(img, detector) # show image for obj in objs: dbface_utils.drawbbox(img, obj) cv2.imwrite(args.savepath, img) print('Script finished successfully.')