示例#1
0
def recognize_from_video(video): 
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
    
    if video == '0':
        print('[INFO] Webcam mode is activated')
        capture = cv2.VideoCapture(0)
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    else:
        if pathlib.Path(video).exists():
            capture = cv2.VideoCapture(video)

    while(True):
        ret, img = capture.read()
        objs = detect_objects(img, detector)
        for obj in objs:
            dbface_utils.drawbbox(img, obj)
        cv2.imshow('frame', img)

        # press q to end video capture
        if cv2.waitKey(1)&0xFF == ord('q'):
            break
        if not ret:
            continue

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
示例#2
0
def recognize_from_image():
    # prepare input data
    img = load_image(args.input)
    print(f'input image shape: {img.shape}')

    # net initialize
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
        env_id=args.env_id,
    )

    pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=args.env_id)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            detector.compute(img, THRESHOLD, IOU)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        detector.compute(img, THRESHOLD, IOU)

    # plot result
    res_img = plot_results(detector, pose, img, COCO_CATEGORY)
    cv2.imwrite(args.savepath, res_img)
    print('Script finished successfully.')
示例#3
0
def recognize_from_image():
    # prepare input data
    org_img = load_image(
        args.input,
        (IMAGE_HEIGHT, IMAGE_WIDTH),
    )

    input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH),
                            normalize_type='127.5',
                            gen_input_ailia=True)

    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            preds_ailia = net.predict([input_data])
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        preds_ailia = net.predict([input_data])

    # postprocessing
    detections = but.postprocess(preds_ailia)

    # generate detections
    for detection in detections:
        but.plot_detections(org_img, detection, save_image_path=args.savepath)
    print('Script finished successfully.')
示例#4
0
def main():
    info = {
        ("paris-streetview", "rect"):
        (WEIGHT_PARIS_STREETVIEW_PATH, MODEL_PARIS_STREETVIEW_PATH, (256,
                                                                     256)),
        ("celebahq", "rect"):
        (WEIGHT_CELEBAHQ_256_PATH, MODEL_CELEBAHQ_256_PATH, (256, 256)),
        ("celebahq-512", "rect"):
        (WEIGHT_CELEBAHQ_512_PATH, MODEL_CELEBAHQ_512_PATH, (512, 512)),
        ("celebahq-512", "stroke"): (WEIGHT_CELEBAHQ_FREEFORM_PATH,
                                     MODEL_CELEBAHQ_FREEFORM_PATH, (512, 512)),
        ("places2", "stroke"):
        (WEIGHT_PLACE2_PATH, MODEL_PLACE2_PATH, (512, 680)),
    }
    key = (args.model, args.mask_type)
    if key not in info:
        logger.error("(MODEL = %s, MASK_TYPE = %s) is unmatch." % key)
        logger.info("appropriate settings:\n"
                    "\t(MODEL = paris-streetview, MASK_TYPE = rect)\n"
                    "\t(MODEL = celebahq, MASK_TYPE = rect)\n"
                    "\t(MODEL = celebahq-512, MASK_TYPE = rect)\n"
                    "\t(MODEL = celebahq-512, MASK_TYPE = stroke)\n"
                    "\t(MODEL = places2, MASK_TYPE = stroke)")
        sys.exit(-1)

    # model files check and download
    weight_path, model_path, img_shape = info[key]
    check_and_download_models(weight_path, model_path, REMOTE_PATH)

    # net initialize
    net = ailia.Net(model_path, weight_path, env_id=args.env_id)

    recognize_from_image(net, img_shape)
def main():
    # model files check and download
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)

    # load audio
    for input_data_path in args.input:
        logger.info('=' * 80)
        logger.info(f'input: {input_data_path}')
        data = sf.read(input_data_path)

        # create instance
        session = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for c in range(5):
                start = int(round(time.time() * 1000))
                label, conf = crnn(data, session)
                end = int(round(time.time() * 1000))
                logger.info("\tailia processing time {} ms".format(end -
                                                                   start))
        else:
            label, conf = crnn(data, session)

        logger.info(label)
        logger.info(conf)

        logger.info('Script finished successfully.')
示例#6
0
def enhance_image():
    for image_path in args.input:
        # prepare input data
        img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
        img = cv2.resize(img, dsize=(H, W))

        # net initialize
        model = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
        upsampler = RealESRGAN(model)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for i in range(5):
                start = int(round(time.time() * 1000))
                output = upsampler.enhance(img)
                end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            output = upsampler.enhance(img)

        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, output)

        logger.info('Script finished successfully.')
示例#7
0
def recognize_from_image():
    # prepare input data
    input_data = load_image(
        args.input,
        (IMAGE_HEIGHT, IMAGE_WIDTH),
        normalize_type='ImageNet',
        gen_input_ailia=True
    )

    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            preds_ailia = net.predict(input_data)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        preds_ailia = net.predict(input_data)

    # postprocessing
    print_results(preds_ailia, vgg16_labels.imagenet_category)
    print('Script finished successfully.')
示例#8
0
def process_video():
    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    if args.face_recognition:
        locator = FaceLocator()
    else:
        locator = None

    if args.video == '0':
        print('[INFO] Webcam mode is activated')
        capture = cv2.VideoCapture(0)
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    else:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    while (True):
        ret, frame = capture.read()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if not ret:
            continue

        img = process_frame(net, locator, frame)

        cv2.imshow('frame', img[..., ::-1])

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
示例#9
0
def recognize_from_image():
    # prepare input data
    input_img = cv2.imread(args.input)
    data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH),
                      normalize_type='255',
                      gen_input_ailia=True)

    # net initalize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    # compute execution time
    for i in range(5):
        start = int(round(time.time() * 1000))
        preds_ailia = net.predict(data)[0]
        end = int(round(time.time() * 1000))
        print(f'ailia processing time {end - start} ms')

    visualize_plots(input_img, preds_ailia)
    cv2.imwrite(args.savepath, input_img)

    # Confidence Map
    channels = preds_ailia.shape[0]
    cols = 8
    plot_images('confidence',
                preds_ailia,
                tile_shape=((int)((channels + cols - 1) / cols), cols))
    print('Script finished successfully.')
示例#10
0
def wavfile_input_recognition():
    if args.beamdecode:
        try:
            from ctcdecode import CTCBeamDecoder
        except ImportError:
            raise ImportError("BeamCTCDecoder requires paddledecoder package.")

        decoder = CTCBeamDecoder(
            LABELS,
            LM_PATH,
            ALPHA,
            BETA,
            CUTOFF_TOP_N,
            CUTOFF_PROB,
            BEAM_WIDTH,
            NUM_PROCESS,
            BRANK_LABEL_INDEX,
        )

    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    for soundf_path in args.input:
        logger.info(soundf_path)
        if args.ailia_audio:
            wav,sr = sf.read(soundf_path)
            wav = ailia.audio.resample(wav,sr,SAMPLING_RATE)
        else:
            wav = librosa.load(soundf_path, sr=SAMPLING_RATE)[0]
        spectrogram = create_spectrogram(wav)
        net.set_input_shape(spectrogram[0].shape)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for c in range(5):
                start = int(round(time.time() * 1000))
                preds_ailia, output_length = net.predict(spectrogram)
                end = int(round(time.time() * 1000))
                logger.info("\tailia processing time {} ms".format(end-start))
        else:
            # Deep Speech output: output_probability, output_length
            preds_ailia, output_length = net.predict(spectrogram)

        if args.beamdecode:
            text = beam_ctc_decode(
                torch.from_numpy(preds_ailia),
                torch.from_numpy(output_length),
                decoder,
            )
        else:
            text = decode(preds_ailia[0], output_length)

        savepath = get_savepath(args.savepath, soundf_path, ext='.txt')
        logger.info(f'Results saved at : {savepath}')
        with open(savepath, 'w', encoding='utf-8') as f:
            f.write(text)
        logger.info(f'predict sentence:\n{text}')
    logger.info('Script finished successfully.')
示例#11
0
def transform_image():
    """Full transormation on a single image loaded from filepath in arguments."""
    image = cv2.imread(args.input)
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')

    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    if args.face_recognition:
        locator = FaceLocator()
    else:
        locator = None

    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))

            out_image = process_frame(net, locator, image)

            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')

    else:
        out_image = process_frame(net, locator, image)

    cv2.imwrite(args.savepath, out_image[..., ::-1])
    return True
示例#12
0
def recognize_from_image():
    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        input_img = load_image(
            image_path,
            (IMAGE_HEIGHT, IMAGE_WIDTH),
            normalize_type='None',
        )
        input_data = get_processed_image(input_img)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for i in range(args.benchmark_count):
                start = int(round(time.time() * 1000))
                preds = net.predict(input_data)
                end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            preds = net.predict(input_data)

        # show results
        print_results(preds, inceptionv4_labels.imagenet_category)

    logger.info('Script finished successfully.')
def main():
    # model files check and download
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)

    if args.arch == 'bert-base-cased' or args.arch == 'bert-base-uncased':
        tokenizer = BertTokenizer.from_pretrained(args.arch)
    else:
        tokenizer = BertJapaneseTokenizer.from_pretrained("cl-tohoku/" +
                                                          args.arch)

    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
    net.set_input_blob_shape((1, PADDING_LEN),
                             net.find_blob_index_by_name("token_type_ids"))
    net.set_input_blob_shape((1, PADDING_LEN),
                             net.find_blob_index_by_name("input_ids"))
    net.set_input_blob_shape((1, PADDING_LEN),
                             net.find_blob_index_by_name("attention_mask"))

    with codecs.open(args.input[0], 'r', 'utf-8', 'ignore') as f:
        s = f.readlines()

    for text in s:
        tokenized_text = tokenizer.tokenize(text)
        original_text_len = len(tokenized_text)

        for j in range(len(tokenized_text), PADDING_LEN):
            tokenized_text.append('[PAD]')

        score = numpy.zeros((len(tokenized_text)))
        suggest = {}

        for i in range(0, len(tokenized_text)):
            masked_index = i

            if tokenized_text[masked_index] == '[PAD]':
                continue

            tokenized_text_saved = tokenized_text[masked_index]

            tokenized_text[masked_index] = '[MASK]'

            outputs = inference(net, tokenizer, tokenized_text, masked_index,
                                original_text_len)

            target_ids = tokenizer.convert_tokens_to_ids(
                [tokenized_text_saved])
            index = target_ids[0]
            score[masked_index] = outputs[0][0, masked_index][index]

            predictions = torch.from_numpy(outputs[0][0, masked_index]).topk(1)
            index = predictions.indices[0]
            top_token = tokenizer.convert_ids_to_tokens([index])[0]
            suggest[masked_index] = top_token

            tokenized_text[masked_index] = tokenized_text_saved

        fine_text = colorize(tokenized_text, score, suggest)
        print(fine_text)

    print('Script finished successfully.')
def recognize_from_image():
    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        src_img = cv2.imread(image_path)
        input_data = load_image(
            image_path,
            (IMAGE_HEIGHT, IMAGE_WIDTH),
        )
        input_data = input_data[np.newaxis, :, :, :]
        net.set_input_shape(input_data.shape)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for i in range(5):
                start = int(round(time.time() * 1000))
                preds_ailia = net.predict(input_data)
                end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            preds_ailia = net.predict(input_data)

        # postprocessing
        pred = preds_ailia.reshape((IMAGE_HEIGHT, IMAGE_WIDTH))
        dst = transfer(src_img, pred)
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, dst)
    logger.info('Script finished successfully.')
示例#15
0
def recognize_from_video():
    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    if args.video == '0':
        print('[INFO] Webcam mode is activated')
        capture = cv2.VideoCapture(0)
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    else:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    while (True):
        ret, frame = capture.read()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if not ret:
            continue

        frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        input_data = preprocess(frame)
        net.set_input_shape(input_data.shape)

        boxes, labels, scores, masks = net.predict([input_data])

        display_objdetect_image(frame, boxes, labels, scores, masks)
        plt.pause(.01)

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
示例#16
0
def recognize_from_image():
    # prepare input data
    image = Image.open(args.input)
    input_data = preprocess(image)

    # net initialize
    # This model requires fuge gpu memory so fallback to cpu mode
    env_id = args.env_id
    if env_id != -1 and ailia.get_environment(env_id).props == "LOWPOWER":
        env_id = -1
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
    net.set_input_shape(input_data.shape)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            boxes, labels, scores, masks = net.predict([input_data])
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        boxes, labels, scores, masks = net.predict([input_data])

    # postprocessing
    fig, ax = create_figure()
    display_objdetect_image(
        fig, ax, image, boxes, labels, scores, masks, savepath=args.savepath
    )
    print('Script finished successfully.')
def main():
    # model files check and download
    info = {
        'lip': (WEIGHT_LIP_PATH, MODEL_LIP_PATH,
                (IMAGE_LIP_SIZE, IMAGE_LIP_SIZE), CATEGORY_LIP),
        'atr': (WEIGHT_ATR_PATH, MODEL_ATR_PATH,
                (IMAGE_ATR_SIZE, IMAGE_ATR_SIZE), CATEGORY_ATR),
        'pascal': (WEIGHT_PASCAL_PATH, MODEL_PASCAL_PATH,
                   (IMAGE_PASCAL_SIZE, IMAGE_ATR_SIZE), CATEGORY_PASCAL),
    }
    weight_path, model_path, img_size, category = info[args.arch]
    check_and_download_models(weight_path, model_path, REMOTE_PATH)

    # Workaround for accuracy issue on
    # ailia SDK 1.2.4 + opset11 + gpu (metal/vulkan)
    detector = ailia.Net(model_path, weight_path, env_id=args.env_id)

    params = {'img_size': img_size, 'category': category}
    if args.video is not None:
        # video mode
        recognize_from_video(args.video, detector, params)
    else:
        # image mode
        # input image loop
        for image_path in args.input:
            # prepare input data
            logger.info(image_path)
            recognize_from_image(image_path, detector, params)

    logger.info('Script finished successfully.')
示例#18
0
def recognize_from_image():
    # prepare input data
    input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH),
                            gen_input_ailia=True)

    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            preds_ailia = net.predict(input_data)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        preds_ailia = net.predict(input_data)

    # postprocessing
    if args.smooth:
        preds_ailia = smooth_output(preds_ailia)

    save_pred(preds_ailia, args.savepath, IMAGE_HEIGHT, IMAGE_WIDTH)
    print('Script finished successfully.')
示例#19
0
def enhance_video():
    # net initialize
    model = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
    upsampler = RealESRGAN(model)

    capture = get_capture(args.video)
    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        logger.warning(
            'currently, video results cannot be output correctly...')
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        save_h, save_w = calc_adjust_fsize(f_h, f_w, IMAGE_HEIGHT, IMAGE_WIDTH)
        writer = get_writer(args.savepath, save_h, save_w * 2)
    else:
        writer = None

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        img = cv2.resize(frame, dsize=(H, W))

        # inference
        output = upsampler.enhance(img)

        #plot result
        cv2.imshow('frame', output)

        if writer is not None:
            writer.release()
        logger.info('Script finished successfully.')
示例#20
0
def microphone_input_recognition():
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')

    if args.beamdecode:
        try:
            from ctcdecode import CTCBeamDecoder
        except ImportError:
            raise ImportError("BeamCTCDecoder requires paddledecoder package.")

        decoder = CTCBeamDecoder(LABELS, LM_PATH, ALPHA, BETA, CUTOFF_TOP_N, CUTOFF_PROB, BEAM_WIDTH,
                                  NUM_PROCESS, BRANK_LABEL_INDEX)

    while True:
        wav = record_microphone_input()
        spectrogram = create_spectrogram(wav)

        # net initialize
        net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
        net.set_input_shape(spectrogram[0].shape)

        # inference
        print('Translating...')
        #Deep Speech output: output_probability, output_length
        preds_ailia, output_length = net.predict(spectrogram)

        if args.beamdecode:
            text = beam_ctc_decode(torch.from_numpy(preds_ailia), torch.from_numpy(output_length), decoder)
        else:
            text = decode(preds_ailia[0], output_length)

        print(f'predict sentence:\n{text}\n')
        time.sleep(1)
示例#21
0
def extract_feature_vec_from_image():
    # prepare input data
    input_img = load_image(
        args.input,
        (IMAGE_HEIGHT, IMAGE_WIDTH),
        normalize_type='None',
    )
    input_data = prepare_input_data(input_img)

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    fe_net = ailia.Net(FE_MODEL_PATH, FE_WEIGHT_PATH, env_id=env_id)
    fe_net.set_input_shape(input_data.shape)

    input_dict = {'data': input_data}

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            _ = fe_net.predict(input_dict)[0]
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        _ = fe_net.predict(input_dict)[0]

    # Extracting the output of a specifc layer
    idx = fe_net.find_blob_index_by_name('encode1')
    preds_ailia = fe_net.get_blob_data(idx)
    print(preds_ailia.reshape(preds_ailia.shape[0], -1))
    print('Script finished successfully.')
示例#22
0
def recognize_from_image():
    # prepare input data
    input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH),
                            normalize_type='ImageNet',
                            gen_input_ailia=True)
    src_img = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH),
                         normalize_type='None')

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            preds_ailia = net.predict(input_data)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        preds_ailia = net.predict(input_data)

    # postprocessing
    res_img = postprocess(src_img, preds_ailia)
    cv2.imwrite(args.savepath, res_img)
    print('Script finished successfully.')
示例#23
0
def recognize_from_image():
    # prepare input data
    input_data = load_image(args.input, (IMAGE_HEIGHT, IMAGE_WIDTH),
                            normalize_type='255',
                            gen_input_ailia=True)

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            preds_ailia = net.predict(input_data)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        preds_ailia = net.predict(input_data)

    # postprocessing
    output_img = preds_ailia[0].transpose((1, 2, 0))
    output_img = cv2.cvtColor(output_img, cv2.COLOR_RGB2BGR)
    cv2.imwrite(args.savepath, output_img * 255)
    print('Script finished successfully.')
示例#24
0
def main():
    # model files check and download
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)

    ailia_model = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
    tokenizer = DistilBertTokenizer.from_pretrained(
        'distilbert-base-uncased-finetuned-sst-2-english')
    model_inputs = tokenizer.encode_plus(args.input, return_tensors="pt")
    inputs_onnx = {
        k: v.cpu().detach().numpy()
        for k, v in model_inputs.items()
    }

    logger.info("Input : " + str(args.input))

    # inference
    if args.benchmark:
        logger.info('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            score = ailia_model.predict(inputs_onnx)
            end = int(round(time.time() * 1000))
            logger.info("\tailia processing time {} ms".format(end - start))
    else:
        score = ailia_model.predict(inputs_onnx)

    score = numpy.exp(score) / numpy.exp(score).sum(-1, keepdims=True)

    label_name = ["negative", "positive"]

    label_id = numpy.argmax(numpy.array(score))
    logger.info("Label : " + str(label_name[label_id]))
    logger.info("Score : " + str(score[0][0][label_id]))

    logger.info('Script finished successfully.')
def main():
    dic_model = {
        'blouse': (WEIGHT_BLOUSE_PATH, MODEL_BLOUSE_PATH, IMAGE_BLOUSE_PATH),
        'dress': (WEIGHT_DRESS_PATH, MODEL_DRESS_PATH, IMAGE_DRESS_PATH),
        'outwear':
        (WEIGHT_OUTWEAR_PATH, MODEL_OUTWEAR_PATH, IMAGE_OUTWEAR_PATH),
        'skirt': (WEIGHT_SKIRT_PATH, MODEL_SKIRT_PATH, IMAGE_SKIRT_PATH),
        'trousers':
        (WEIGHT_TROUSERS_PATH, MODEL_TROUSERS_PATH, IMAGE_TROUSERS_PATH),
    }
    weight_path, model_path, img_path = dic_model[args.clothing_type]

    # model files check and download
    check_and_download_models(weight_path, model_path, REMOTE_PATH)

    # initialize
    net = ailia.Net(model_path, weight_path, env_id=args.env_id)

    if args.video is not None:
        # video mode
        recognize_from_video(args.video, net)
    else:
        # image mode
        # input image loop
        for image_path in args.input:
            logger.info(image_path)
            recognize_from_image(image_path, net)
    logger.info('Script finished successfully.')
示例#26
0
def main():
    info = {
        ("celeba", 256):
        (WEIGHT_CELEBA256_PATH, MODEL_CELEBA256_PATH, (256, 256)),
        ("places", 256):
        (WEIGHT_PLACES256_PATH, MODEL_PLACES256_PATH, (256, 256)),
        ("places", 512):
        (WEIGHT_PLACES512_PATH, MODEL_PLACES512_PATH, (512, 512)),
        ("places", 1024):
        (WEIGHT_PLACES1024_PATH, MODEL_PLACES1024_PATH, (1024, 1024))
    }
    key = (args.model, args.img_res)
    if key not in info:
        logger.error("(MODEL = %s, IMG_RESOLUTION = %s) is unmatch." % key)
        logger.info("appropriate settings:\n"
                    "\t(MODEL = celeba, IMG_RESOLUTION = 256)\n"
                    "\t(MODEL = places, IMG_RESOLUTION = 256 or 512 or 1024)")
        sys.exit(-1)

    if "FP16" in ailia.get_environment(
            args.env_id).props or platform.system() == 'Darwin':
        logger.warning('This model do not work on FP16. So use CPU mode.')
        args.env_id = 0

    # model files check and download
    weight_path, model_path, img_shape = info[key]
    check_and_download_models(weight_path, model_path, REMOTE_PATH)

    # net initialize
    net = ailia.Net(model_path, weight_path, env_id=args.env_id)

    recognize_from_image(net, img_shape)
示例#27
0
def compare_images():
    # prepare input data
    imgs_1 = prepare_input_data(args.inputs[0])
    imgs_2 = prepare_input_data(args.inputs[1])
    imgs = np.concatenate([imgs_1, imgs_2], axis=0)

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    # compute execution time
    print('Start inference...')
    for i in range(5):
        start = int(round(time.time() * 1000))
        preds_ailia = net.predict(imgs)
        end = int(round(time.time() * 1000))
        print(f'ailia processing time {end - start} ms')

    # postprocessing
    fe_1 = np.concatenate([preds_ailia[0], preds_ailia[1]], axis=0)
    fe_2 = np.concatenate([preds_ailia[2], preds_ailia[3]], axis=0)
    sim = cosin_metric(fe_1, fe_2)

    print(f'Similarity of ({args.inputs[0]}, {args.inputs[1]}) : {sim:.3f}')
    if THRESHOLD > sim:
        print('They are not the same face!')
    else:
        print('They are the same face!')
示例#28
0
def recognize_from_image():
    # prepare input data
    image = Image.open(args.input)
    input_data = preprocess(image)

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
    net.set_input_shape(input_data.shape)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            boxes, labels, scores, masks = net.predict([input_data])
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        boxes, labels, scores, masks = net.predict([input_data])

    # postprocessing
    display_objdetect_image(image,
                            boxes,
                            labels,
                            scores,
                            masks,
                            savepath=args.savepath)
    print('Script finished successfully.')
示例#29
0
def main():
    # model files check and download
    print("=== ST-GCN model ===")
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
    print("=== OpenPose model ===")
    check_and_download_models(WEIGHT_POSE_PATH, MODEL_POSE_PATH,
                              REMOTE_POSE_PATH)

    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    if args.arch == "pyopenpose":
        pose = op.WrapperPython()
        params = dict(model_folder='.', model_pose='COCO')
        pose.configure(params)
        pose.start()
    else:
        pose = ailia.PoseEstimator(MODEL_POSE_PATH,
                                   WEIGHT_POSE_PATH,
                                   env_id=args.env_id,
                                   algorithm=POSE_ALGORITHM)
        if args.arch == "openpose":
            pose.set_threshold(0.1)

    if args.video is not None:
        # realtime mode
        recognize_realtime(args.video, pose, net)
    else:
        # offline mode
        recognize_from_file(args.input, pose, net)
示例#30
0
def recognize_from_image(filename):
    # load input image
    img = load_image(filename)
    print(f'input image shape: {img.shape}')
    img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)

    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
    
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            objs = detect_objects(img, detector)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        objs = detect_objects(img, detector)
        
    # show image 
    for obj in objs:
        dbface_utils.drawbbox(img, obj)
    cv2.imwrite(args.savepath, img)

    print('Script finished successfully.')