Пример #1
0
def feature_flow():
    bbox_util = BBoxUtility(NUM_CLASSES)
    raw_inputs, images = load_inputs(image_files)
    inputs = preprocess_input(np.array(raw_inputs))

    dump_activation_layer = 'conv4_2'
    compare_layer_name = 'conv6_2'
    print('dump_activation_layer', dump_activation_layer)
    print('target_layer_name', compare_layer_name)

    # normal SSD network
    model1 = SSD300v2(input_shape, num_classes=NUM_CLASSES)
    model1.load_weights('weights_SSD300.hdf5', by_name=True)
    predictions = run_network(model1, inputs)
    results = bbox_util.detection_out(predictions)
    plot_detections(images, results)

    # get dump layer's output (as input for flow network)
    input_img2 = inputs[1:2, :, :, :]
    layer_dump = get_layer_output(model=model1, inputs=input_img2, output_layer_name=dump_activation_layer)
    print('layer_dump.shape = ', layer_dump.shape)

    # flow (raw rgb)
    flow_rgb = compute_flow(image_files[1], image_files[0])

    print('flow.shape', flow_rgb.shape)
    imshow_fig(cv2.cvtColor(draw_hsv(flow_rgb), cv2.COLOR_BGR2RGB), title='flow_rgb')

    # flow (re-sized for feature map)
    flow_feature = get_flow_for_filter(flow_rgb)
    # imshow_fig(flow_feature[:, :, 0], title='flow_feature_y', cmap='gray')
    # imshow_fig(flow_feature[:, :, 1], title='flow_feature_x', cmap='gray')

    # warp image by flow_rgb
    iimg1 = cv2.imread(image_files[0])
    img_warp = warp_flow(iimg1, flow_rgb)
    imshow_fig(cv2.cvtColor(img_warp, cv2.COLOR_BGR2RGB), title='frame_2_warp')

    # shift feature
    shifted_feature = shift_filter(layer_dump, flow_feature)

    # flow net
    model2 = SSD300_conv4_3((128, 128, 512), num_classes=NUM_CLASSES)
    model2.load_weights('weights_SSD300.hdf5', by_name=True)
    predictions = run_network(model2, shifted_feature)
    results = bbox_util.detection_out(predictions)
    plot_detections(images[1:2], results)

    # get specific layer's output and compare them (for debugging)
    compare_model_layer(model1, input_img2, compare_layer_name,
                        model2, shifted_feature, compare_layer_name,
                        True)

    sess.close()
    plt.show()
Пример #2
0
def main(img_paths):
    """
    Detect objects in images.

    Parameters
    ----------
    img_paths : list of strings
    """
    # Load the model
    voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
                   'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
                   'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant',
                   'Sheep', 'Sofa', 'Train', 'Tvmonitor']
    NUM_CLASSES = len(voc_classes) + 1
    input_shape = (300, 300, 3)
    model = SSD300(input_shape, num_classes=NUM_CLASSES)
    model.load_weights('weights_SSD300.hdf5', by_name=True)
    bbox_util = BBoxUtility(NUM_CLASSES)

    # Load the inputs
    inputs = []
    images = []
    for img_path in img_paths:
        img = image.load_img(img_path, target_size=(300, 300))
        img = image.img_to_array(img)
        images.append(imread(img_path))
        inputs.append(img.copy())
    inputs = preprocess_input(np.array(inputs))

    # Predict
    preds = model.predict(inputs, batch_size=1, verbose=1)
    results = bbox_util.detection_out(preds)

    # Visualize
    for i, img in enumerate(images):
        create_overlay(img, results[i], voc_classes,
                       "{}-det.png".format(img_paths[i]))
Пример #3
0
class VideoTest(object):
    """ Class for testing a trained SSD model on a video file and show the
        result in a window. Class is designed so that one VideoTest object
        can be created for a model, and the same object can then be used on
        multiple videos and webcams.

        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.

            model:       An SSD model. It should already be trained for
                         images similar to the video to test on.

            input_shape: The shape that the model expects for its input,
                         as a tuple, for example (300, 300, 3)

            bbox_util:   An instance of the BBoxUtility class in ssd_utils.py
                         The BBoxUtility needs to be instantiated with
                         the same number of classes as the length of
                         class_names.

    """

    def __init__(self, class_names, model, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255*i/self.num_classes
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            self.class_colors.append(col)

    def run(self, video_path = 0, start_frame = 0, conf_thresh = 0.6):
        """ Runs the test on a video (or webcam)

        # Arguments
        video_path: A file path to a video to be tested on. Can also be a number,
                    in which case the webcam with the same number (i.e. 0) is
                    used instead

        start_frame: The number of the first frame of the video to be processed
                     by the network.

        conf_thresh: Threshold of confidence. Any boxes with lower confidence
                     are not visualized.

        """

        vid = cv2.VideoCapture(video_path)
        if not vid.isOpened():
            raise IOError(("Couldn't open video file or webcam. If you're "
            "trying to open a webcam, make sure you video_path is an integer!"))

        # Compute aspect ratio of video
        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
        vidar = vidw/vidh

        # Skip frames until reaching start_frame
        if start_frame > 0:
            vid.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame)

        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()

        num_frame=0

        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return

            im_size = (self.input_shape[0], self.input_shape[1])
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

            # Reshape to original aspect ratio for later visualization
            # The resized version is used, to visualize what kind of resolution
            # the network has to work with.
            to_draw = cv2.resize(resized, (int(self.input_shape[0]*vidar), self.input_shape[1]))

            # Use model to predict
            inputs = [image.img_to_array(rgb)]
            tmp_inp = np.array(inputs)
            x = preprocess_input(tmp_inp)

            y = self.model.predict(x)


            # This line creates a new TensorFlow device every time. Is there a
            # way to avoid that?
            results = self.bbox_util.detection_out(y)

            if len(results) > 0 and len(results[0]) > 0:
                # Interpret output, only one frame is used
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                for i in range(top_conf.shape[0]):
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                    # Draw the box on top of the to_draw image
                    class_num = int(top_label_indices[i])
                    cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                                  self.class_colors[class_num], 2)
                    text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i])

                    text_top = (xmin, ymin-10)
                    text_bot = (xmin + 80, ymin + 5)
                    text_pos = (xmin + 5, ymin)
                    cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1)
                    cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
            # print(text)
            # Calculate FPS
            # This computes FPS for everything, not just the model's execution
            # which may or may not be what you want
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0

            # Draw FPS in top left corner
            cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1)
            cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)

            cv2.imshow("SSD result", to_draw)
            cv2.waitKey(10)

            # print(text)
            ##########################################
            cv2.imwrite(".\video\\pic\\frame_" + str('{0:04d}'.format(num_frame)) +".png", to_draw)
            ##########################################

            num_frame+=1
Пример #4
0
def dtc_predict_py_edit(
        predict_dir,
        predicted_dir,
        dict,
        model_path,
        conf_threshold=0.6,
        is_conf_threshold_down=False,
        class_model=None,
        dict_class={
            0.0: "Car",
            1.0: "Bicycle",
            2.0: "Pedestrian",
            3.0: "Signal",
            4.0: "Signs",
            5.0: "Truck"
        },
        img_height=331,
        img_width=331,
        is_overwrite=False,
        max_box=100  #None
    ,
        min_top_indices=0,
        fontsize=4,
        linewidth=0.5):
    """
    dtc_predict.py を一部変更した関数
    指定ディレクトリの画像1件ずつpredict実行し、バウンティングボックス付きの画像出力
    predictの位置や予測ラベルを書いたデータフレームも作成する
    Args:
        predict_dir : 予測したい画像がはいってるディレクトリ
        predicted_dir : 予測した画像出力先ディレクトリ
        dict : 予測クラスのidとクラス名の辞書型データ 例:dict = {0.0:"other", 1.0:"Bicycle", 2.0:"Pedestrian", 3.0:"Signal", 4.0:"Signs", 5.0:"Truck", 6.0:"Car"}
        model_path : ロードするモデルファイルのパス
        conf_threshold : 予測結果の確信度の閾値
        is_conf_threshold_down : 検出が出るまで予測結果の確信度の閾値を下げるかのフラグ
        class_model : 検出した領域をSSD以外のモデルで再予測する分類モデルオブジェクト
        dict_class : 再予測する分類モデルのクラスのidとクラス名の辞書型データ
        img_height, img_width : 再予測する分類モデルの入力画像サイズ(modelのデフォルトのサイズである必要あり)
        is_overwrite : 出力先に同名ファイルあればpredictしないかどうか
        max_box : 1画像で検出する領域の最大数。Noneなら制限なし。100なら100個まで検出
        min_top_indices : 最小でもmin_top_indices+1個は検出する。デフォルトの0なら最低1個は検出。is_conf_threshold_down=Trueでないと機能しない
        fontsize: 画像に表示する予測ラベルの文字の大きさ
        linewidth: 画像に表示する予測boxの線の太さ
    Return:
        なし(予測した画像出力、予測結果のデータフレーム出力(pred.csv))
    """
    num_classes = len(dict)  #6+1

    # 検出したとする確信度のしきい値
    #conf_threshold = 0.6#0.5#0.7

    # 予測する画像が入っているフォルダ
    #predict_dir = r'C:\Users\shingo\jupyter_notebook\tfgpu_py36_work\AI_Edge_Contest\object_detection\SSD_classes_py\all_SSD_module\SSD\ssd_train'
    # 予測する画像のパス一覧
    img_path_list = glob.glob(os.path.join(predict_dir, "*.*"))

    # 予測結果を保存するフォルダ
    ##predicted_dir = r'D:\work\AI_Edge_Contest\object_detect\object_detection\SSD_classes\predicted_images'
    if not os.path.isdir(predicted_dir):
        os.mkdir(predicted_dir)

    file_names = []  # ファイル名一覧
    inputs = []  # ネットワークへ入力するため指定サイズに変形済みの画像データ
    images_h = []  # オリジナルサイズの画像の縦幅
    images_w = []  # オリジナルサイズの画像の横幅
    images = []  # 結果を見るためのオリジナルサイズの画像データ
    correctpred_filecount = 0

    # メニュー辞書作成
    #dict = {0.0:"other", 1.0:"Bicycle", 2.0:"Pedestrian", 3.0:"Signal", 4.0:"Signs", 5.0:"Truck", 6.0:"Car"}

    # モデルロード
    model = create_model(num_classes)
    #model.load_weights(r'D:\work\AI_Edge_Contest\object_detect\object_detection\SSD_classes\weight_ssd_best.hdf5')
    model.load_weights(model_path)
    print(model)

    import pandas as pd
    # 空のデータフレーム作成
    pred_df = pd.DataFrame(
        index=[],
        columns=['file_names', 'conf', 'label_name', 'x', 'y', 'x+w', 'y+h'])

    # ---- json用 ----
    prediction = {}
    # ----------------

    # 画像情報1件ずつ取得
    for path in tqdm(img_path_list):

        # 出力先に同名ファイルあればpredictしない
        if is_overwrite == False and os.path.isfile(
                os.path.join(predicted_dir, os.path.basename(path))):
            continue

        file_names = []
        file_names.append(os.path.basename(path))
        #print(file_names)
        # ---- json用 ----
        img_name = os.path.basename(path)
        prediction[img_name] = {}
        # ----------------

        img, height, width = load_img(path, target_size=input_shape)
        img = image.img_to_array(img)

        inputs = []
        inputs.append(img.copy())

        images_h = []
        images_h.append(height)

        images_w = []
        images_w.append(width)

        images = []
        temp_image = imread(path)
        images.append(temp_image.copy())

        # 入力画像前処理
        inputs = preprocess_input(np.array(inputs))
        #print(inputs.shape)

        # 予測実行
        pred_results = model.predict(inputs, batch_size=1, verbose=0)
        #print(pred_results)
        bbox_util = BBoxUtility(num_classes)
        #print(bbox_util)
        bbox_results = bbox_util.detection_out(pred_results)
        #print(bbox_results)

        for file_no in range(len(file_names)):
            #for file_no in range(100):
            #print('-----------', file_names[file_no], '-----------')

            # 元の画像を描画
            plt.imshow(images[file_no] / 255.)

            # 予想したボックスの情報を取得
            bbox_label = bbox_results[file_no][:, 0]
            bbox_conf = bbox_results[file_no][:, 1]
            bbox_xmin = bbox_results[file_no][:, 2]
            bbox_ymin = bbox_results[file_no][:, 3]
            bbox_xmax = bbox_results[file_no][:, 4]
            bbox_ymax = bbox_results[file_no][:, 5]

            # 確信度がしきい値以上のボックスのみ抽出
            top_indices = [
                i for i, conf in enumerate(bbox_conf) if conf > conf_threshold
            ]

            # --------- len(top_indices) > min_top_indices になるまでconf_threshold 下げるか --------------------
            if is_conf_threshold_down == True:
                conf_threshold_change = 0.0
                if len(top_indices) == 0:
                    # 基準のconf_threshold で検出なければ、検出でるまで閾値下げる
                    for conf_threshold_i in range(int(conf_threshold // 0.01)):
                        conf_threshold_change = conf_threshold - (
                            (conf_threshold_i + 1) * 0.01)
                        top_indices = [
                            i for i, conf in enumerate(bbox_conf)
                            if conf > conf_threshold_change
                        ]
                        if len(top_indices) > min_top_indices:
                            #print('conf_threshold_i :', conf_threshold_i)
                            break
                            #continue
                #print('len(top_indices) :', len(top_indices))
                #print('conf_threshold_change :', conf_threshold_change)
            # -----------------------------------------------------------------------------------

            img_h = images_h[file_no]
            img_w = images_w[file_no]
            currentAxis = plt.gca()

            for box_no, top_index in enumerate(top_indices):
                # 検出数の最大値超えたらcontinue
                #( AI_Edge_Contest では1画像に100件までの制限あるため)
                if (max_box is not None) and (box_no >= max_box):
                    continue

                # 予想したボックスを作成
                label = bbox_label[top_index]
                #print('label:', label)
                x = int(bbox_xmin[top_index] * img_w)
                y = int(bbox_ymin[top_index] * img_h)
                w = int((bbox_xmax[top_index] - bbox_xmin[top_index]) * img_w)
                h = int((bbox_ymax[top_index] - bbox_ymin[top_index]) * img_h)
                box = (x, y), w, h

                # 予想したボックスを描画
                conf = float(bbox_conf[top_index])
                label_name = dict[label]
                # -------------------- 分類モデルで予測 --------------------
                # 検出に加えるかのフラグ
                is_inclode = True
                if class_model is not None:
                    if conf < conf_threshold:
                        # (ndarray型の画像データから)検出領域切り出し
                        # ndarray型の切り出しは[y:y_max,x:x_max]の順番じゃないとおかしくなる
                        # https://qiita.com/tadOne/items/8967f046ca395669329d
                        tmp_img = images[file_no]
                        dst = tmp_img[y:y + h, x:x + w]
                        # ここで画像表示すると、bbox付き画像保存されない.あくまで確認用
                        #plt.imshow(dst / 255.)
                        #plt.show()

                        #print('file_names :', file_names[file_no])
                        #print('label_name :', label_name)
                        #print('conf :', conf)
                        # 切り出し画像を分類モデルでpredict
                        class_conf, class_label_id = predict_class_model(
                            dst, class_model, img_height, img_width)
                        #print('class_label_name :', dict_class[class_label_id])
                        #print('class_conf :', class_conf)

                        # 分類モデルの方がスコア高ければ、ラベルとスコア書き換える
                        if conf <= class_conf:
                            label_name = dict_class[class_label_id]
                            conf = float(class_conf)
                        #elif top_index > 1:
                        #    # 検出数が1以上あってスコア低ければ検出に加えない
                        #    is_inclode = False
                # ---------------------------------------------------------

                # スコア低ければ検出に加えない
                if is_inclode == True:
                    # 画像にbbox描画
                    display_txt = '{:0.2f}, {}'.format(conf, label_name)
                    currentAxis.add_patch(
                        plt.Rectangle(*box,
                                      fill=False,
                                      edgecolor=get_class_color(label),
                                      linewidth=linewidth))
                    currentAxis.text(x,
                                     y,
                                     display_txt,
                                     bbox={
                                         'facecolor': get_class_color(label),
                                         'alpha': 0.2
                                     },
                                     fontsize=fontsize)
                    # 結果をデータフレームで保持
                    series = pd.Series([
                        file_names[file_no], conf, label_name, x, y, x + w,
                        y + h
                    ],
                                       index=pred_df.columns)
                    #print(series)
                    pred_df = pred_df.append(series, ignore_index=True)
                    #print(pred_df)
                    # -------------------------- json用 --------------------------
                    if label_name not in prediction[img_name]:
                        prediction[img_name][label_name] = []
                    prediction[img_name][label_name].append(
                        [x, y, x + w, y + h])
                    #print(prediction)
                    # ------------------------------------------------------------

            # 予測結果の画像ファイルを保存
            plt.savefig(os.path.join(predicted_dir, file_names[file_no]),
                        dpi=300)
            plt.clf()

    output_dir = os.path.dirname(predicted_dir)
    pred_df.to_csv(os.path.join(output_dir, 'pred.csv'), sep='\t', index=False)

    # -------------------------- json用 --------------------------
    with open(os.path.join(output_dir, 'pred.json'), 'w') as f:
        json.dump(prediction, f, indent=4)  # インデント付けてjsonファイル出力
Пример #5
0
    elif result[6] == None:
        print 'no image data string.'
    else:
        values = map(ord, list(result[6]))
        # Pepperから得られる画像情報は一列なのでxy,RGBにマッピング
        i = 0
        for y in range(0, height):
            for x in range(0, width):
                image.itemset((y, x, 0), values[i + 0])
                image.itemset((y, x, 1), values[i + 1])
                image.itemset((y, x, 2), values[i + 2])
                i += 3

        image = cv2.resize(image, (300, 300))
        #cv2.imwrite("input.jpg",frame)

        # kerasに与えるための画像の前処理
        input_image = [imgprocess.img_to_array(image)]
        input_image = preprocess_input(np.array(input_image))

        prediction = model.predict(input_image)  # 実行
        results = bbox_util.detection_out(prediction)  # BBOXとして出力を処理
        result_image = draw_bbox_from_results(image, results)  # BBOXを画像に描画

        cv2.imshow("pepper-camera-ssd", image)  # 画像表示

        k = cv2.waitKey(5)
        if k == ord('q'): break

videoDevice.unsubscribe(nameID)
def run_camera(input_shape, model, root_path, action_class, frame_number):
    num_classes = 21
    conf_thresh = 0.6
    bbox_util = BBoxUtility(num_classes)

    class_colors = []
    for i in range(0, num_classes):
        hue = 255 * i / num_classes
        col = np.zeros((1, 1, 3)).astype("uint8")
        col[0][0][0] = hue
        col[0][0][1] = 128  # Saturation
        col[0][0][2] = 255  # Value
        cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
        col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
        class_colors.append(col)

    vid = cv2.VideoCapture(0)
    sleep(2)
    # Compute aspect ratio of video
    vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
    vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
    # vidar = vidw / vidh
    crop_path = root_path + 'crop/' + action_class
    origin_path = root_path + 'origin/' + action_class
    mask_path = root_path + 'mask/' + action_class
    samples = os.listdir(origin_path)
    sample_count = len(samples)
    while True:
        retval, orig_image = vid.read()
        if not retval:
            print("Done!")
            return None

        im_size = (input_shape[0], input_shape[1])
        resized = cv2.resize(orig_image, im_size)
        rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

        inputs = [image.img_to_array(rgb)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)

        y = model.predict(x)

        results = bbox_util.detection_out(y)
        if len(results) > 0 and len(results[0]) > 0:
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= conf_thresh
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            if 15 not in top_label_indices:
                detected = False
            else:
                detected = True
                for i in range(top_conf.shape[0]):
                    xmin = int(round((top_xmin[i] * vidw) * 0.9))
                    ymin = int(round((top_ymin[i] * vidh) * 0.9))
                    xmax = int(round(
                        (top_xmax[i] * vidw) *
                        1.1)) if int(round(
                            (top_xmax[i] * vidw) * 1.1)) <= vidw else int(
                                round(top_xmax[i] * vidw))
                    ymax = int(round(
                        (top_ymax[i] * vidh) *
                        1.1)) if int(round(
                            (top_ymax[i] * vidh) * 1.1)) <= vidh else int(
                                round(top_ymax[i] * vidh))

                    # save frames
                    class_num = int(top_label_indices[i])
                    if class_num == 15:
                        frame = copy.deepcopy(orig_image)
                        cv2.rectangle(orig_image, (xmin, ymin), (xmax, ymax),
                                      class_colors[class_num], 2)
                        curl = np.zeros_like(frame, dtype='uint8')
                        curl[ymin:ymax, xmin:xmax, :] = frame[ymin:ymax,
                                                              xmin:xmax, :]
                        crop = cv2.resize(frame[ymin:ymax, xmin:xmax, :],
                                          (64, 96))
                        curl = cv2.resize(curl, (160, 120))
                        frame = cv2.resize(frame, (160, 120))
        else:
            detected = False

        cv2.imshow("SSD result", orig_image)
        if cv2.waitKey(5) & 0xFF == ord('s') and detected:
            sample_count += 1
            cv2.imwrite(crop_path + '/' + str(sample_count + 10000) + '.jpg',
                        crop)
            print('saving ' + crop_path + '/' + str(sample_count + 10000) +
                  '.jpg')
            cv2.imwrite(origin_path + '/' + str(sample_count + 10000) + '.jpg',
                        frame)
            print('saving ' + origin_path + '/' + str(sample_count + 10000) +
                  '.jpg')
            cv2.imwrite(mask_path + '/' + str(sample_count + 10000) + '.jpg',
                        curl)
            print('saving ' + mask_path + '/' + str(sample_count + 10000) +
                  '.jpg')
class ssdKeras():
    def __init__(self):
        #self.node_name = "ssd_keras"
        #rospy.init_node(self.node_name)
        self.class_names = [
            "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
            "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
            "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
            "tvmonitor"
        ]
        self.num_classes = len(self.class_names)
        self.input_shape = (300, 300, 3)
        self.model = SSD(self.input_shape, num_classes=self.num_classes)
        self.model.load_weights(
            '/home/abdulrahman/catkin_ws/src/victim_localization/resources/ssd_keras/weights_SSD300.hdf5'
        )

        self.bbox_util = BBoxUtility(self.num_classes)
        self.conf_thresh = 0.4

        self.model._make_predict_function()
        self.graph = tf.get_default_graph()

        self.detection_index = DL_msgs_boxes()

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128  # Saturation
            col[0][0][2] = 255  # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                   int(cvcol[0][0][2]))
            self.class_colors.append(col)

        self.bridge = CvBridge()  # Create the cv_bridge object

        self.image_sub = rospy.Subscriber(
            "front_cam/rgb/image_raw", Image, self.detect_image,
            queue_size=1)  # the appropriate callbacks

        self.box_coordinate_pub = rospy.Publisher(
            "/ssd_detction/box", DL_msgs_boxes,
            queue_size=5)  # the appropriate callbacks

    def detect_image(self, ros_image):
        """ Runs the test on a video (or webcam)

        # Arguments

        conf_thresh: Threshold of confidence. Any boxes with lower confidence
                     are not visualized.

        """

        #### Use cv_bridge() to convert the ROS image to OpenCV format  ####
        try:
            image_orig = self.bridge.imgmsg_to_cv2(ros_image, "bgr8")
        except CvBridgeError as e:
            print(e)
        ##########

        vidw = 640.0  # change from cv2.cv.CV_CAP_PROP_FRAME_WIDTH
        vidh = 480.0  # change from cv2.cv.CV_CAP_PROP_FRAME_HEIGHT
        vidar = vidw / vidh

        #print(type(image_orig))
        im_size = (self.input_shape[0], self.input_shape[1])
        resized = cv2.resize(image_orig, im_size)
        rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

        # Reshape to original aspect ratio for later visualization
        # The resized version is used, to visualize what kind of resolution
        # the network has to work with.
        to_draw = cv2.resize(resized, (640, 480))

        # Use model to predict
        inputs = [image.img_to_array(rgb)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)

        start_time = time.time()  #debuggin

        with self.graph.as_default():
            y = self.model.predict(x)
        #print("--- %s seconds_for_one_image ---" % (time.time() - start_time))

        # This line creates a new TensorFlow device every time. Is there a
        # way to avoid that?
        results = self.bbox_util.detection_out(y)

        if len(results) > 0 and len(results[0]) > 0:
            # Interpret output, only one frame is used
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf)
                if conf >= self.conf_thresh
            ]

            top_conf = det_conf[top_indices]

            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            #initiaze the detection msgs
            box_msg = DL_msgs_box()
            box_msg.xmin = 0
            box_msg.ymin = 0
            box_msg.xmax = 0
            box_msg.ymax = 0
            box_msg.Class = "Non"  # 100 reflect a non-class value
            self.detection_index.boxes.append(box_msg)

            print(top_xmin)
            for i in range(top_conf.shape[0]):
                self.detection_index.boxes[:] = []
                xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                #include the corner to be published
                box_msg = DL_msgs_box()
                box_msg.xmin = xmin
                box_msg.ymin = ymin
                box_msg.xmax = xmax
                box_msg.ymax = ymax
                box_msg.Class = self.class_names[int(top_label_indices[i])]
                self.detection_index.boxes.append(box_msg)

                # Draw the box on top of the to_draw image

                class_num = int(top_label_indices[i])
                if (self.class_names[class_num] == "person"):
                    cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                                  self.class_colors[class_num], 2)
                    text = self.class_names[class_num] + " " + ('%.2f' %
                                                                top_conf[i])

                    text_top = (xmin, ymin - 10)
                    text_bot = (xmin + 80, ymin + 5)
                    text_pos = (xmin + 5, ymin)
                    cv2.rectangle(to_draw, text_top, text_bot,
                                  self.class_colors[class_num], -1)
                    cv2.putText(to_draw, text, text_pos,
                                cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)
                    #cv2.circle(to_draw, (xmax, ymax),1,self.class_colors[class_num],30);

            self.detection_index.header = std_msgs.msg.Header()
            self.detection_index.header.stamp = rospy.Time.now()
            print(self.detection_index)
            self.box_coordinate_pub.publish(self.detection_index)
            self.detection_index.boxes[:] = []
            #self.detection_index.boxes.clear()
        cv2.imshow("SSD result", to_draw)
        cv2.waitKey(1)

    def main(self):
        rospy.spin()
Пример #8
0
class SSD:
	def __init__(self, input_shape = (300, 300, 3)):
		self.num_class = config.NUM_CLASSES
		self.input_tensor = tf.placeholder(tf.float32, [None, input_shape[0], input_shape[1], input_shape[2]])
		self.label_tensor = tf.placeholder(tf.float32, [None, 7308, 4 + config.NUM_CLASSES + 8])
		self.predicts = self.build(input_shape, config.NUM_CLASSES)
		self.input_shape = input_shape
		self.global_step = tf.train.create_global_step()
		var_list = tf.global_variables()
		var_list = [var for var in var_list if "Adam" not in var.name]
		self.saver = tf.train.Saver(var_list, max_to_keep=1)
		self.bbox_util = BBoxUtility(self.num_class)

	def build(self, input_shape, num_classes):
		img_size = (input_shape[1], input_shape[0])
		#300
		conv1_1 = tf.layers.conv2d(self.input_tensor, 64, 3, name = "conv1_1", padding = "same", activation = activation)
		self.conv1_1 = conv1_1
		conv1_2 = tf.layers.conv2d(conv1_1, 64, 3, name = "conv1_2", padding = "same", activation = activation)
		pool1 = tf.layers.max_pooling2d(conv1_2, pool_size = 2, strides = 2, padding = "same")
		#150
		conv2_1 = tf.layers.conv2d(pool1, 128, 3, name = "conv2_1", padding = "same", activation = activation)
		conv2_2 = tf.layers.conv2d(conv2_1, 128, 3, name = "conv2_2", padding = "same", activation = activation)
		pool2 = tf.layers.max_pooling2d(conv2_2, pool_size = 2, strides = 2, padding = "same")
		#75
		conv3_1 = tf.layers.conv2d(pool2, 256, 3, name = "conv3_1", padding = "same", activation = activation)
		conv3_2 = tf.layers.conv2d(conv3_1, 256, 3, name = "conv3_2", padding = "same", activation = activation)
		conv3_3 = tf.layers.conv2d(conv3_2, 256, 3, name = "conv3_3", padding = "same", activation = activation)
		pool3 = tf.layers.max_pooling2d(conv3_3, pool_size = 2, strides = 2, padding = "same")
		#38
		conv4_1 = tf.layers.conv2d(pool3, 512, 3, name = "conv4_1", padding = "same", activation = activation)
		conv4_2 = tf.layers.conv2d(conv4_1, 512, 3, name = "conv4_2", padding = "same", activation = activation)
		conv4_3 = tf.layers.conv2d(conv4_2, 512, 3, name = "conv4_3", padding = "same", activation = activation)
		pool4 = tf.layers.max_pooling2d(conv4_3, pool_size = 2, strides = 2, padding = "same")
		#19
		conv5_1 = tf.layers.conv2d(pool4, 512, 3, name = "conv5_1", padding = "same", activation = activation)
		conv5_2 = tf.layers.conv2d(conv5_1, 512, 3, name = "conv5_2", padding = "same", activation = activation)
		conv5_3 = tf.layers.conv2d(conv5_2, 512, 3, name = "conv5_3", padding = "same", activation = activation)
		pool5 = tf.layers.max_pooling2d(conv5_3, pool_size = 3, strides = 1, padding = "same")
		#19
		fc6_kernel = tf.get_variable(name = "fc6/kernel", shape = (3, 3, 512, 1024), initializer = tf.truncated_normal_initializer(stddev=0.1))
		fc6_bias = tf.get_variable(name = "fc6/bias", shape = [1024], initializer = tf.truncated_normal_initializer(stddev = 0.1))
		fc6 = tf.nn.atrous_conv2d(pool5, fc6_kernel, rate = 6, padding = "SAME", name = "fc6")
		fc6 = tf.nn.bias_add(fc6, fc6_bias)
		fc6 = activation(fc6)

		fc7 = tf.layers.conv2d(fc6, 1024, 1, name = "fc7", padding = "same", activation = activation)

		conv6_1 = tf.layers.conv2d(fc7, 256, 1, name = "conv6_1", padding = "same", activation = activation)
		conv6_2 = tf.layers.conv2d(conv6_1, 512, 3, name = "conv6_2", strides = (2,2), padding = "same", activation = activation)
		#10
		conv7_1 = tf.layers.conv2d(conv6_2, 128, 1, name = "conv7_1", padding = "same", activation = activation)
		conv7_2 = tf.keras.layers.ZeroPadding2D()(conv7_1)
		conv7_2 = tf.layers.conv2d(conv7_2, 256, 3, name = "conv7_2", padding = "valid", strides = (2,2), activation = activation)
		#5
		conv8_1 = tf.layers.conv2d(conv7_2, 128, 1, name = "conv8_1", padding = "same", activation = activation)
		conv8_2 = tf.layers.conv2d(conv8_1, 256, 3, name = "conv8_2", padding = "same", strides = (2,2), activation = activation)
		#3
		pool6 = tf.keras.layers.GlobalAveragePooling2D(name='pool6')(conv8_2)
		#1
		num_priors = 3
		conv4_3_norm = self.normalize_layer(conv4_3, 20, 512, "conv4_3_norm")
		conv4_3_norm_mbox_loc = tf.layers.conv2d(conv4_3_norm, num_priors * 4, 3, name = "conv4_3_norm_mbox_loc", padding = "same")
		conv4_3_norm_mbox_loc_flat = tf.layers.flatten(conv4_3_norm_mbox_loc)
		name = "conv4_3_norm_mbox_conf"
		if num_classes!=21:
			name+="_"+str(num_classes)
		conv4_3_norm_mbox_conf = tf.layers.conv2d(conv4_3_norm, num_priors * num_classes, 3, name = name, padding = "same")
		conv4_3_norm_mbox_conf_flat = tf.layers.flatten(conv4_3_norm_mbox_conf)
		shape = [0, 38, 38, 512]
		conv4_3_norm_mbox_priorbox = self.priorBox_layer(conv4_3_norm, shape, img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox')

		num_priors = 6
		fc7_mbox_loc = tf.layers.conv2d(fc7, num_priors * 4, 3, name = "fc7_mbox_loc", padding = "same")
		fc7_mbox_loc_flat = tf.layers.flatten(fc7_mbox_loc)
		name = "fc7_mbox_conf"
		if num_classes!=21:
			name+="_"+str(num_classes)	
		fc7_mbox_conf = tf.layers.conv2d(fc7, num_priors * num_classes, 3, name = name, padding = "same")
		fc7_mbox_conf_flat = tf.layers.flatten(fc7_mbox_conf)
		shape = [0, 19, 19, 1024]
		fc7_mbox_priorbox = self.priorBox_layer(fc7, shape, img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox')

		num_priors = 6
		conv6_2_mbox_loc = tf.layers.conv2d(conv6_2, num_priors * 4, 3, name = "conv6_2_mbox_loc", padding = "same")
		conv6_2_mbox_loc_flat = tf.layers.flatten(conv6_2_mbox_loc)
		name = "conv6_2_mbox_conf"
		if num_classes!=21:
			name+="_"+str(num_classes)	
		conv6_2_mbox_conf = tf.layers.conv2d(conv6_2, num_priors * num_classes, 3, name = name, padding = "same")
		conv6_2_mbox_conf_flat = tf.layers.flatten(conv6_2_mbox_conf)
		shape = [0, 10, 10, 256]
		conv6_2_mbox_priorbox = self.priorBox_layer(conv6_2, shape, img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox')

		num_priors = 6
		conv7_2_mbox_loc = tf.layers.conv2d(conv7_2, num_priors * 4, 3, name = "conv7_2_mbox_loc", padding = "same")
		conv7_2_mbox_loc_flat = tf.layers.flatten(conv7_2_mbox_loc)
		name = "conv7_2_mbox_conf"
		if num_classes!=21:
			name+="_"+str(num_classes)		
		conv7_2_mbox_conf = tf.layers.conv2d(conv7_2, num_priors * num_classes, 3, name = name, padding = "same")
		conv7_2_mbox_conf_flat = tf.layers.flatten(conv7_2_mbox_conf)
		shape = [0, 5, 5, 256]
		conv7_2_mbox_priorbox = self.priorBox_layer(conv7_2, shape, img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox')

		num_priors = 6
		conv8_2_mbox_loc = tf.layers.conv2d(conv8_2, num_priors * 4, 3, name = "conv8_2_mbox_loc", padding = "same")
		conv8_2_mbox_loc_flat = tf.layers.flatten(conv8_2_mbox_loc)
		name = "conv8_2_mbox_conf"
		if num_classes!=21:
			name+="_"+str(num_classes)		
		conv8_2_mbox_conf = tf.layers.conv2d(conv8_2, num_priors * num_classes, 3, name = name, padding = "same")
		conv8_2_mbox_conf_flat = tf.layers.flatten(conv8_2_mbox_conf)
		shape = [0, 3, 3, 256]
		conv8_2_mbox_priorbox = self.priorBox_layer(conv8_2, shape, img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox')

		num_priors = 6
		pool6_mbox_loc_flat = tf.layers.dense(pool6, units = num_priors * 4, name='pool6_mbox_loc_flat')
		name = "pool6_mbox_conf_flat"
		if num_classes!=21:
			name+="_"+str(num_classes)	
		pool6_mbox_conf_flat = tf.layers.dense(pool6, units = num_priors * num_classes, name=name)
		shape = [0, 1, 1, 256]
		pool6_mbox_priorbox = self.priorBox_layer(tf.reshape(pool6, (-1, 1, 1, 256)), shape, img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox')

		mbox_loc = tf.concat([conv4_3_norm_mbox_loc_flat, 
							fc7_mbox_loc_flat, 
							conv6_2_mbox_loc_flat,
							conv7_2_mbox_loc_flat,
							conv8_2_mbox_loc_flat,
							pool6_mbox_loc_flat], axis = 1)

		mbox_conf = tf.concat([conv4_3_norm_mbox_conf_flat,
							fc7_mbox_conf_flat,
							conv6_2_mbox_conf_flat,
							conv7_2_mbox_conf_flat,
							conv8_2_mbox_conf_flat,
							pool6_mbox_conf_flat], axis = 1)

		mbox_priorbox = tf.concat([conv4_3_norm_mbox_priorbox,
								fc7_mbox_priorbox,
								conv6_2_mbox_priorbox,
								conv7_2_mbox_priorbox,
								conv8_2_mbox_priorbox,
								pool6_mbox_priorbox], axis=1)
		mbox_priorbox = tf.cast(mbox_priorbox, tf.float32)

		num_boxes = tf.shape(mbox_loc)[-1]//4
		mbox_loc = tf.reshape(mbox_loc, (-1, num_boxes, 4))
		mbox_conf = tf.reshape(mbox_conf, (-1, num_boxes, num_classes))
		mbox_conf = tf.nn.softmax(mbox_conf)
		predictions = tf.concat([mbox_loc, mbox_conf, mbox_priorbox], axis = 2)

		return predictions

	def normalize_layer(self, net, init_scale, shape=512, name = None):
		init_scale = init_scale * np.ones(shape)
		scale = tf.Variable(init_scale, name = name, dtype = tf.float32)
		return scale * tf.nn.l2_normalize(net, 3)

	def priorBox_layer(self, net, input_shape, img_size, min_size, max_size=None, aspect_ratios=None, flip=True, variances=[0.1], clip=True, name = None):
		aspect_ratios_ = [1.0]
		if max_size:
			if max_size < min_size:
				raise Exception('max_size must be greater than min_size.')
			aspect_ratios_.append(1.0)
		if aspect_ratios:
			for ar in aspect_ratios:
				if ar in aspect_ratios_: continue
				aspect_ratios_.append(ar)
				if flip:
					aspect_ratios_.append(1.0 / ar)
		variances = np.array(variances)
		layer_width = input_shape[2]
		layer_height = input_shape[1]
		img_width = img_size[0]
		img_height = img_size[1]
		box_widths = []
		box_heights = []
		for ar in aspect_ratios_:
			if ar == 1 and len(box_widths) == 0:
				box_widths.append(min_size)
				box_heights.append(min_size)
			elif ar == 1 and len(box_widths) > 0:
				box_widths.append(np.sqrt(min_size * max_size))
				box_heights.append(np.sqrt(min_size * max_size))
			elif ar != 1:
				box_widths.append(min_size * np.sqrt(ar))
				box_heights.append(min_size / np.sqrt(ar))
		box_widths = 0.5 * np.array(box_widths)
		box_heights = 0.5 * np.array(box_heights)
		step_x = img_width / layer_width
		step_y = img_height / layer_height
		linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x, layer_width)
		liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y, layer_height)

		centers_x, centers_y = np.meshgrid(linx, liny)
		centers_x = centers_x.reshape(-1, 1)
		centers_y = centers_y.reshape(-1, 1)
		num_priors_ = len(aspect_ratios_)
		prior_boxes = np.concatenate((centers_x, centers_y), axis=1)
		prior_boxes = np.tile(prior_boxes, (1, 2 * num_priors_))
		prior_boxes[:, ::4] -= box_widths
		prior_boxes[:, 1::4] -= box_heights
		prior_boxes[:, 2::4] += box_widths
		prior_boxes[:, 3::4] += box_heights
		prior_boxes[:, ::2] /= img_width
		prior_boxes[:, 1::2] /= img_height
		prior_boxes = prior_boxes.reshape(-1, 4)
		if clip:
			prior_boxes = np.minimum(np.maximum(prior_boxes, 0.0), 1.0)
		num_boxes = len(prior_boxes)
		if len(variances) == 1:
			variances = np.ones((num_boxes, 4)) * variances[0]
		elif len(variances) == 4:
			variances = np.tile(variances, (num_boxes, 1))
		else:
			raise Exception('Must provide one or four variances.')
		prior_boxes = np.concatenate((prior_boxes, variances), axis=1)
		prior_boxes_tensor = tf.expand_dims(tf.Variable(prior_boxes, name = name), 0)
		pattern = [tf.shape(net)[0], 1, 1]
		prior_boxes_tensor = tf.tile(prior_boxes_tensor, pattern)
		return prior_boxes_tensor

	def restore(self, sess):
		checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint)
		if checkpoint:
			print("restore from: " + checkpoint)
			self.saver.restore(sess, checkpoint)
		### if you don't want to use this pretrained weights and don't want to install h5py, you can comment this block ##
		elif os.path.exists('weights_SSD300.hdf5'):
			print("restore from pretrained weights")
			tf_variables = {}
			ops = []
			for variables in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
				if "Adam" in variables.name or "RMS" in variables.name: continue
				key = variables.name.split("/")[0].split(":")[0]
				if key not in tf_variables:
					tf_variables[key] = [variables]
				else:
					tf_variables[key].append(variables)
			with h5py.File('weights_SSD300.hdf5','r') as f:
				for k in f.keys():
					if k in tf_variables:
						nn = 0
						for kk in f[k].keys():
							a = np.array(f[k][kk])
							ops.append(tf_variables[k][nn].assign(a))
							nn+=1
			sess.run(ops)
		######################################## end ####################################################################
		elif os.path.exists("vgg16.npy"):
			print("restore from vgg weights.")
			vgg = np.load("vgg16.npy", encoding='latin1').item()
			ops = []
			vgg_dict = ["conv1_1", "conv1_2", "conv2_1", "conv2_2", "conv3_1", "conv3_2", "conv3_3", "conv4_1", "conv4_2", "conv4_3",
			"conv5_1","conv5_2","conv5_3"]
			tf_variables = {}
			for variables in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
				if "Adam" or "RMS" in variables.name: continue
				key = variables.name.split("/")[0].split(":")[0]
				if key not in vgg_dict: continue
				if key not in tf_variables:
					tf_variables[key] = [variables]
					ops.append(variables.assign(vgg[key][0]))
				else:
					tf_variables[key].append(variables)
					ops.append(variables.assign(vgg[key][1]))
			sess.run(ops)
		else:
			print("train from scratch.")

	def train(self):
		self.loss = MultiboxLoss(self.num_class, neg_pos_ratio=2.0).compute_loss(self.label_tensor, self.predicts)
		self.loss_avg = tf.reduce_mean(self.loss)
		
		learning_rate = tf.train.exponential_decay(config.lr, self.global_step, 10000 ,0.9, True, name='learning_rate')
		self.train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss, global_step = self.global_step)
		self.train_loss_summary = tf.summary.scalar("loss_train", self.loss_avg)
		self.val_loss_summary = tf.summary.scalar("loss_val", self.loss_avg)
		self.writer = tf.summary.FileWriter(FLAGS.checkpoint)

		priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
		self.bbox_util = BBoxUtility(self.num_class, priors)

		gt = pickle.load(open(FLAGS.label_file, 'rb'))
		keys = sorted(gt.keys())
		num_train = int(round(0.8 * len(keys)))
		train_keys = keys[:num_train]
		val_keys = keys[num_train:]

		gen = Generator(gt, self.bbox_util, config.BATCH_SIZE, FLAGS.images_dir,
		                train_keys, val_keys,
		                (self.input_shape[0], self.input_shape[1]))#, do_crop=False, saturation_var = 0, brightness_var = 0, contrast_var = 0, lighting_std = 0, hflip_prob = 0, vflip_prob = 0)
		c = tf.ConfigProto()
		c.gpu_options.allow_growth = True
		with tf.Session(config=c) as sess:
			sess.run(tf.global_variables_initializer())
			self.writer.add_graph(sess.graph)
			self.restore(sess)
			for inputs, labels in gen.generate(True):
				_, lo, step, summary = sess.run([self.train_op, self.loss_avg, self.global_step, self.train_loss_summary], feed_dict = {self.input_tensor: inputs, self.label_tensor: labels})
				sys.stdout.write("train loss: %d %.3f \r"%(step, lo))
				sys.stdout.flush()
				self.writer.add_summary(summary, step)
				if step % config.save_step == config.save_step - 1:
					self.saver.save(sess, os.path.join(FLAGS.checkpoint, "ckpt"), global_step=self.global_step)
					print("saved")
				if step % config.snapshot_step == 0:
					val_in, val_la = next(gen.generate(False))
					lo, s, preds = sess.run([self.loss_avg, self.train_loss_summary, self.predicts], feed_dict = {self.input_tensor: val_in, self.label_tensor: val_la})
					self.writer.add_summary(s, step)
					print("val loss:", step, lo)
					images = [np.array(val_in[v]) for v in range(val_in.shape[0])]
					self.paint_imgs(preds, images)

		print("Train finished. Checkpoint saved in", FLAGS.checkpoint)

	def predict(self):
		inputs = []
		images = []
		file_name = []
		file_list = os.listdir(FLAGS.images_dir)
		for file in file_list:
			img_path = os.path.join(FLAGS.images_dir, file)
			img = cv2.imread(img_path)
			images.append(img.copy())
			img = cv2.resize(img, (300, 300)).astype(np.float32)
			inputs.append(img)
			file_name.append(file)
		inputs = np.array(inputs)
		inputs = preprocess_input(np.array(inputs))

		c = tf.ConfigProto()
		c.gpu_options.allow_growth = True
		with tf.Session(config=c) as sess:
			init = tf.global_variables_initializer()
			sess.run(init)
			self.restore(sess)
			#todo batch
			preds = sess.run(self.predicts, feed_dict = {self.input_tensor: inputs})
			self.paint_imgs(preds, images, file_name)
		print("Finished. Images saved in " + FLAGS.eval_output_dir)

	def paint_imgs(self, preds, images, file_name=None):
		results = self.bbox_util.detection_out(preds)
		for j, img in enumerate(images):
			# Parse the outputs.
			det_label = results[j][:, 0]
			det_conf = results[j][:, 1]
			det_xmin = results[j][:, 2]
			det_ymin = results[j][:, 3]
			det_xmax = results[j][:, 4]
			det_ymax = results[j][:, 5]

			# Get detections with confidence higher than config.visual_threshold.
			top_indices = [i for i, conf in enumerate(det_conf) if conf >= config.visual_threshold]

			top_conf = det_conf[top_indices]
			top_label_indices = det_label[top_indices].tolist()
			top_xmin = det_xmin[top_indices]
			top_ymin = det_ymin[top_indices]
			top_xmax = det_xmax[top_indices]
			top_ymax = det_ymax[top_indices]

			for i in range(top_conf.shape[0]):
				xmin = int(round(top_xmin[i] * img.shape[1]))
				ymin = int(round(top_ymin[i] * img.shape[0]))
				xmax = int(round(top_xmax[i] * img.shape[1]))
				ymax = int(round(top_ymax[i] * img.shape[0]))
				score = top_conf[i]
				label = int(top_label_indices[i])
				label_name = config.CLASS_NAMES[label - 1]
				display_txt = '{:0.2f}, {}'.format(score, label_name)
				coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1
				cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,0,0), 2)
				cv2.putText(img, display_txt, (xmin, ymin), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255,255,64), 1)
			if not file_name:
				name = str(j)+".jpg"
			else:
				name = file_name[j]
			cv2.imwrite(os.path.join(FLAGS.eval_output_dir, name), img)
Пример #9
0
def classify():

    class_colors = makeClassColors()

    voc_classes = [
        'Prescription', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car',
        'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike',
        'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
    ]
    NUM_CLASSES = len(voc_classes) + 1

    input_shape = (300, 300, 3)
    model = SSD300(input_shape, num_classes=NUM_CLASSES)
    #model.load_weights('weights_SSD300.hdf5', by_name=True)
    weights_file = "./checkpoints/weights.00-1.25.hdf5"
    model.load_weights(weights_file, by_name=True)

    bbox_util = BBoxUtility(NUM_CLASSES)

    target_dir = "/Users/donchan/Documents/myData/miyuki/camera/None"
    #target_dir = "/Volumes/m1124/FTP/073010"
    #target_dir = "./pics"
    # load original image
    #files = glob.glob("/Volumes/m1124/FTP/073010/*.jpg")

    files = os.listdir(target_dir)
    np.random.shuffle(files)
    files = [os.path.join(target_dir, f) for f in files if ".jpg" in f]
    files = files[:10]

    logging.info("- " * 40)
    logging.info(files)
    logging.info("- " * 40)
    # build pipeline images for classification (original image size)
    pipeline_images = [mpimg.imread(file) for file in files]

    # load image for prediction (shrinked 300 x 300)
    image_load_ops = lambda x: image.load_img(x, target_size=(300, 300))
    image_array_ops = lambda x: image.img_to_array(x)

    inputs = []
    for x in files:
        img = image.load_img(x, target_size=(300, 300))
        img = image.img_to_array(img)
        inputs.append(img.copy())
    #inputs = list( map(image_load_ops, files) )
    #inputs = list( map(image_array_ops, inputs) )

    # keras module to look in class of data image
    logging.info(" keras model starting..... ")
    inputs = preprocess_input(np.array(inputs))
    preds = model.predict(inputs, batch_size=1, verbose=1)
    results = bbox_util.detection_out(preds)

    logging.info("")
    logging.info("Now classification for every images.")
    for i, img in enumerate(pipeline_images):
        # Parse the outputs.
        to_draw = img.copy()

        det_label = results[i][:, 0]
        det_conf = results[i][:, 1]
        det_xmin = results[i][:, 2]
        det_ymin = results[i][:, 3]
        det_xmax = results[i][:, 4]
        det_ymax = results[i][:, 5]

        # Get detections with confidence higher than 0.6.
        top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.5]

        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin = det_xmin[top_indices]
        top_ymin = det_ymin[top_indices]
        top_xmax = det_xmax[top_indices]
        top_ymax = det_ymax[top_indices]

        colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()

        plt.imshow(img / 255.)
        currentAxis = plt.gca()
        prescription_label_name = 0

        for j in range(top_conf.shape[0]):
            xmin = int(round(top_xmin[j] * img.shape[1]))
            ymin = int(round(top_ymin[j] * img.shape[0]))
            xmax = int(round(top_xmax[j] * img.shape[1]))
            ymax = int(round(top_ymax[j] * img.shape[0]))
            score = top_conf[j]
            label = int(top_label_indices[j])
            label_name = voc_classes[label - 1]
            if label_name == "Prescription":
                prescription_label_name = 1

            display_txt = '{:0.2f}, {}'.format(score, label_name)
            coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1
            color = colors[label]
            logging.info("object NO: %d %s" % ((j + 1), label_name))
            logging.info("rectangle info: %s" % (coords, ))
            #logging.info(label_name,color)
            currentAxis.add_patch(
                plt.Rectangle(*coords,
                              fill=False,
                              edgecolor=color,
                              linewidth=2))
            currentAxis.text(xmin,
                             ymin,
                             display_txt,
                             bbox={
                                 'facecolor': color,
                                 'alpha': 0.5
                             })
            cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                          class_colors[label], 2)

        if prescription_label_name == 1:
            cv2.imwrite(os.path.join("./results", str(i) + '.jpg'), to_draw)

        plt.show()
Пример #10
0
    # Capture frame-by-frame
    ret, img = cap.read()
    st = time.time()

    resized = cv2.resize(img, im_size)
    rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
    to_draw = cv2.resize(resized,
                         (int(input_shape[0] * imgar) * 3, input_shape[1] * 3))

    # Use model to predict
    inputs = [image.img_to_array(rgb)]
    tmp_inp = np.array(inputs)
    x = preprocess_input(tmp_inp)
    y = model.predict(x)

    results = bbox_util.detection_out(y)

    if len(results) > 0 and len(results[0]) > 0:
        # Interpret output, only one frame is used
        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin = results[0][:, 2]
        det_ymin = results[0][:, 3]
        det_xmax = results[0][:, 4]
        det_ymax = results[0][:, 5]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= conf_thresh
        ]

        top_conf = det_conf[top_indices]
Пример #11
0
class TLClassifier(object):
    def __init__(self):
        #TODO load classifier
        NUM_CLASSES = 3 + 1
        input_shape = (300, 300, 3)

        # "prior boxes" in the paper
        priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
        self.bbox_util = BBoxUtility(NUM_CLASSES, priors)

        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        self.model.load_weights('weights.180314.hdf5', by_name=True)

    def get_classification(self, img):
        """Determines the color of the traffic light in the image

        Args:
            img (cv::Mat): image containing the traffic light
            assumed 3D numpy.array (800, 600, 3)
            bgr8: CV_8UC3, color image with blue-green-red color order

        Returns:
            int: ID of traffic light color (specified in styx_msgs/TrafficLight)

        """

        img = imresize(img, (300, 300))
        # convert color-order from cv2 to Pillow
        #B, G, R = img.T
        #img = np.array((R, G, B)).T

        img = image.img_to_array(img)
        inputs = np.reshape(img,
                            (1, 300, 300, 3))  # 'inputs' expects this size

        inputs = preprocess_input(np.array(inputs))
        preds = self.model.predict(inputs, batch_size=1, verbose=0)
        results = self.bbox_util.detection_out(preds)

        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin = results[0][:, 2]
        det_ymin = results[0][:, 3]
        det_xmax = results[0][:, 4]
        det_ymax = results[0][:, 5]

        # Get detections with confidence >= 0.8
        top_indices = [j for j, conf in enumerate(det_conf) if conf >= 0.8]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        if top_label_indices == []:
            return TrafficLight.UNKNOWN, 0, 0, 0, 0, 0

        top_xmin = det_xmin[top_indices][0]
        top_ymin = det_ymin[top_indices][0]
        top_xmax = det_xmax[top_indices][0]
        top_ymax = det_ymax[top_indices][0]
        score = top_conf[0]

        # assume only one signal detected
        label = int(top_label_indices[0])
        if label == 0:
            return TrafficLight.UNKNOWN, 0, 0, 0, 0, 0
        elif label == 1:
            return TrafficLight.RED, score, top_xmin, top_ymin, top_xmax, top_ymax
        elif label == 2:
            return TrafficLight.YELLOW, score, top_xmin, top_ymin, top_xmax, top_ymax
        elif label == 3:
            return TrafficLight.GREEN, score, top_xmin, top_ymin, top_xmax, top_ymax
        else:
            return TrafficLight.UNKNOWN, score, top_xmin, top_ymin, top_xmax, top_ymax
Пример #12
0
class UseSSD:
    def __init__(self):
        self.image_width = 300
        self.image_height = 300

        self.voc_classes = [
            'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car',
            'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike',
            'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
        ]
        self.NUM_CLASSES = len(self.voc_classes) + 1

        self.model = SSD300((self.image_height, self.image_width, 3),
                            num_classes=self.NUM_CLASSES)
        self.model.load_weights('weights_SSD300.hdf5', by_name=True)
        self.bbox_util = BBoxUtility(self.NUM_CLASSES)

    def normalize(self, img_array):
        return (img_array - np.mean(img_array)) / np.std(img_array) * 16 + 64

    def process_img(self, img_filepath, confidence, save_dirpath):

        # オリジナル
        with load_img(img_filepath) as img_orig:
            img_orig_array = img_to_array(img_orig)

        # オリジナル(解析用と同じ状態)
        img_orig_array_normalized = self.normalize(img_orig_array)

        # 解析用
        with load_img(img_filepath,
                      target_size=(self.image_height,
                                   self.image_width)) as img:
            img_array = img_to_array(img)
            img_array = self.normalize(img_array)

        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        preds = self.model.predict(img_array, batch_size=1, verbose=1)
        results = self.bbox_util.detection_out(preds)

        if len(results) <= 0:
            return

        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin = results[0][:, 2]
        det_ymin = results[0][:, 3]
        det_xmax = results[0][:, 4]
        det_ymax = results[0][:, 5]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= confidence
        ]

        top_conf = det_conf[top_indices]

        top_label_indices = det_label[top_indices].tolist()
        top_xmin = det_xmin[top_indices]
        top_ymin = det_ymin[top_indices]
        top_xmax = det_xmax[top_indices]
        top_ymax = det_ymax[top_indices]

        filename = os.path.basename(img_filepath)
        fname, ext = os.path.splitext(filename)

        for i in range(top_conf.shape[0]):
            label = int(top_label_indices[i])
            label_name = self.voc_classes[label - 1]

            print('%s: %.8f' % (label_name, top_conf[i]))

            xmin = int(round(top_xmin[i] * img_orig_array.shape[1]))
            ymin = int(round(top_ymin[i] * img_orig_array.shape[0]))
            xmax = int(round(top_xmax[i] * img_orig_array.shape[1]))
            ymax = int(round(top_ymax[i] * img_orig_array.shape[0]))

            acc = top_conf[i] * 100 // 10 * 10
            dir_name = '%s/%s/%02d_%02d' % (save_dirpath, label_name, acc,
                                            acc + 10)
            os.makedirs(dir_name, exist_ok=True)

            target_img_array = img_orig_array[ymin:ymax, xmin:xmax]
            with array_to_img(target_img_array) as target_img:
                target_img.save('%s/%s_%.8f.jpg' %
                                (dir_name, fname, top_conf[i]))

            target_img_array_normalized = img_orig_array_normalized[ymin:ymax,
                                                                    xmin:xmax]
            with array_to_img(
                    target_img_array_normalized) as target_img_normalized:
                target_img_normalized.save('%s/%s_%.8f_normalized.jpg' %
                                           (dir_name, fname, top_conf[i]))
Пример #13
0
def create_files_for_evaluation(args, n_images=200):
    NUM_CLASSES = 21
    THRESHOLD = 0.6
    CLASSES = [
        'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat',
        'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person',
        'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
    ]

    with open(args.path_to_settings, 'r') as fp:
        sets = yaml.safe_load(fp)

    input_shape = (sets['img_height'], sets['img_width'], 3)

    priors = pickle.load(
        open(
            os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                'priorFiles/prior_boxes_ssd300MobileNetV2_224_224.pkl'), 'rb'))

    np.set_printoptions(suppress=True)

    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    config = tf.compat.v1.ConfigProto()

    inputs = []
    images = []
    result_detections = []
    result_images = []
    annotation_files = []

    print('Prepare : {} files for evaluation. '.format(n_images))

    with open(
            os.path.join(sets['dataset_dir'],
                         'VOC2007/ImageSets/Main/test.txt'), 'r') as annot_f:
        for annotation in tqdm(list(annot_f)[:n_images]):
            try:
                img_path = os.path.join(
                    sets['dataset_dir'], 'VOC2007/JPEGImages/'
                ) + annotation.split(' ')[0].strip() + '.jpg'
                img = image.load_img(img_path,
                                     target_size=(input_shape[0],
                                                  input_shape[1]))
                img = image.img_to_array(img)
                result_images.append(img_path)
                images.append(img)
                inputs.append(img.copy())
                annotation_files.append(annotation)
            except Exception as e:
                print('Error while opening file.', e)

    with tf.compat.v1.Session(config=config) as s:
        tf_inference = restore_tf_checkpoint(sets, s, args.model_checkpoints)
        inputs = preprocess_input(np.array(inputs))
        img_per_batch = 5
        results = []
        start_index = 0
        print('Start computing batches')

        for end_index in tqdm(
                range(img_per_batch, inputs.shape[0] + 1, img_per_batch)):
            if not args.model_checkpoints:
                preds = tf_inference.predict(inputs[start_index:end_index, :])
            else:
                preds = tf_inference['sess'].run(
                    fetches=tf_inference['out'],
                    feed_dict={
                        tf_inference['in']: inputs[start_index:end_index, :]
                    })
            results.extend(bbox_util.detection_out(preds))
            start_index = end_index

        for i, img in tqdm(enumerate(images)):
            # Parse the outputs.
            det_label = results[i][:, 0]
            det_conf = results[i][:, 1]
            det_xmin = results[i][:, 2]
            det_ymin = results[i][:, 3]
            det_xmax = results[i][:, 4]
            det_ymax = results[i][:, 5]

            # Get detections with confidence higher than 0.6.
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= THRESHOLD
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            detections = []
            for i in range(top_conf.shape[0]):
                '''
                xmin = int(round(top_xmin[i] * img.shape[1]))
                ymin = int(round(top_ymin[i] * img.shape[0]))
                xmax = int(round(top_xmax[i] * img.shape[1]))
                ymax = int(round(top_ymax[i] * img.shape[0]))
                '''
                xmin = top_xmin[i]
                ymin = top_ymin[i]
                xmax = top_xmax[i]
                ymax = top_ymax[i]

                score = top_conf[i]
                label = int(top_label_indices[i])
                label_name = CLASSES[label - 1]
                detections.append([
                    '{:.2f}'.format(xmin), '{:.2f}'.format(ymin),
                    '{:.2f}'.format(xmax), '{:.2f}'.format(ymax), label_name,
                    '{:.2f}'.format(score)
                ])
            result_detections.append(detections)

        print('Test images: {}'.format(len(result_images)))

        model_predictions = []
        MODEL_PREDICTION_PATH = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'model_evaluation/model_prediction/')
        predicted_images = []

        for index, image_filename in tqdm(enumerate(result_images)):
            image_name = os.path.basename(image_filename)
            path_elements = image_name[:-4]
            predicted_images.append(image_name[:-4])
            annot_dir = os.path.join(MODEL_PREDICTION_PATH)
            os.makedirs(annot_dir, exist_ok=True)
            annot_name = '{}.txt'.format(path_elements)
            annot_filename = os.path.join(annot_dir, annot_name)
            with open(annot_filename, 'w') as output_f:
                for d in result_detections[index]:
                    left, top, right, botton, classe, score = d[0], d[1], d[
                        2], d[3], d[4], d[5]
                    model_predictions.append(
                        (classe, score, left, top, right, botton))
                    output_f.write('{} {} {} {} {} {}\n'.format(
                        classe, score, left, top, right, botton))

        GROUND_TRUTH_LABELS = os.path.join(sets['dataset_dir'],
                                           'VOC2007/Annotations')
        GROUND_TRUTH_PATH = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'model_evaluation/ground_truth/')

        for f in glob(GROUND_TRUTH_PATH + '*'):
            os.remove(f)
        filenames = os.listdir(GROUND_TRUTH_LABELS)
        ground_images = []

        for filename in tqdm(filenames):
            if filename[:-4] not in predicted_images:
                continue
            ground_images.append(filename[:-4])
            tree = ElementTree.parse(
                os.path.join(GROUND_TRUTH_LABELS + '/{}'.format(filename)))
            root = tree.getroot()
            bounding_boxes = []
            one_hot_classes = []
            size_tree = root.find('size')
            width = float(size_tree.find('width').text)
            height = float(size_tree.find('height').text)
            for object_tree in root.findall('object'):
                for bounding_box in object_tree.iter('bndbox'):
                    xmin = float(bounding_box.find('xmin').text) / width
                    ymin = float(bounding_box.find('ymin').text) / height
                    xmax = float(bounding_box.find('xmax').text) / width
                    ymax = float(bounding_box.find('ymax').text) / height
                    class_name = object_tree.find('name').text.title()
                bounding_box = [class_name, xmin, ymin, xmax, ymax]
                bounding_boxes.append(bounding_box)

            with open(
                    os.path.join(GROUND_TRUTH_PATH,
                                 filename.replace('xml', 'txt')), 'w+') as f:
                for p in bounding_boxes:
                    f.write(' '.join([str(s) for s in p]) + "\n")

        print('Completed eval preparation')
        assert len(ground_images) == len(predicted_images)
Пример #14
0
class SSDPrecitor(PredictorBase):
    """
    SSDの識別器
    Arguments:
        modelfile: モデルファイルパス
        shape: SSD識別器に入力する際のモデルサイズ(width, height, channels). デフォルトは (300, 300, 3)
        num_classes: モデルの分類数. デフォルトは 21
        conf_thresh: 検出結果の閾値
    """
    def __init__(self,
                 modelfile,
                 shape=(300, 300, 3),
                 num_classes=21,
                 conf_thresh=0.6):

        self.input_shape = shape
        self.num_classes = num_classes
        self.conf_thresh = conf_thresh

        # モデル作成
        model = SSD(shape, num_classes=num_classes)
        model.load_weights(modelfile)
        self.model = model

        # バウンディングボックス作成ユーティリティ
        self.bbox_util = BBoxUtility(self.num_classes)

    def predict(self, src):
        """
        SSDにより、入力画像からオブジェクトを識別する
        :param src: 入力画像
        :return: ラベルID, スコア, Boxデータ(floatなので注意!!)
        """
        height, width, channels = src.shape
        # 前処理
        x = self._preprocess(src)
        # 推論
        y = self.model.predict(x)
        # 後処理
        results = self._decodebox(y)
        # 出力
        if results.shape[0] > 0:
            results[:, 2] = results[:, 2] * width
            results[:, 3] = results[:, 3] * height
            results[:, 4] = results[:, 4] * width
            results[:, 5] = results[:, 5] * height
            return [(int(x[0]), x[1], x[2:6]) for x in results]

        return []

    def _preprocess(self, src):
        """
        入力された画像に対して前処理を行う
        :param src: 入力画像
        :return: 300x300にリサイズし、BGR->RGBに変換した画像
        """
        im_size = (self.input_shape[0], self.input_shape[1])
        resized = cv2.resize(src, im_size)
        rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
        inputs = [image.img_to_array(rgb)]
        return preprocess_input(np.array(inputs))

    def _decodebox(self, preds):
        """
        識別処理後の後処理
        :param preds: SSDモデルの識別結果
        :return: confが閾値以上のボックスのみ抽出
        """
        conf_thresh = self.conf_thresh

        # ボックス抽出 **これで下記の情報が取得する**
        #   0: label
        #   1: conf
        # 2~5: bbox(xmin, ymin, xmax, ymax)
        box_results = self.bbox_util.detection_out(preds)
        result = np.array([])
        if len(box_results) > 0 and len(box_results[0]) > 0:
            box_result = box_results[0]
            # スコアが閾値以上のデータのインデックスを取り出す
            top_indices = np.where(box_result[:, 1] > self.conf_thresh)[0]
            result = box_result[top_indices]
            return result
        else:
            return result
Пример #15
0
def test_on_video(model,
                  name,
                  experiment,
                  videopath,
                  outvideopath,
                  classnames,
                  batch_size=32,
                  input_shape=(480, 640, 3),
                  soft=False,
                  width=480,
                  height=640,
                  conf_thresh=0.75,
                  csv_conf_thresh=0.75):
    """ Applies a trained SSD model to a video
    
    Arguments:
    model           -- the SSD model, e.g. from get_model
    name            -- name of dataset
    experiment      -- name of training run
    videopath       -- path to input video
    outvideopath    -- path to output video showing the detections
    classnames      -- list of all the classes
    batch_size      -- number of images processed in parallell, lower this if you get out-of-memory errors
    input_shape     -- size of images fed to SSD
    soft            -- Whether to do soft NMS or normal NMS
    width           -- Width to scale detections with (can be set to 1 if detections are already on right scale)
    height          -- Height to scale detections with (can be set to 1 if detections are already on right scale)
    conf_thresh     -- Detections with confidences below this are not shown in output video. Set to negative to not visualize confidences.
    csv_conf_thresh -- Detections with confidences below this are ignored. This should be same as conf_thresh unless conf_thresh is negative.
    
    """
    masker = Masker(name)

    num_classes = len(classnames) + 1
    colors = class_colors(num_classes)

    make_vid = True
    suffix = outvideopath.split('.')[-1]
    if suffix == 'csv':
        make_vid = False
        csvpath = outvideopath
    else:
        csvpath = outvideopath.replace('.{}'.format(suffix), '.csv')

    print_flush('Generating priors')
    im_in = np.random.random(
        (1, input_shape[1], input_shape[0], input_shape[2]))
    priors = model.predict(im_in, batch_size=1)[0, :, -8:]
    bbox_util = BBoxUtility(num_classes, priors)

    vid = io.get_reader(videopath)
    if make_vid:
        outvid = io.get_writer(outvideopath, fps=30)

    inputs = []
    frames = []

    all_detections = []
    for i, frame in enumerate(vid):
        frame = masker.mask(frame)
        resized = cv2.resize(frame, (input_shape[0], input_shape[1]))

        frames.append(frame.copy())
        inputs.append(resized)

        if len(inputs) == batch_size:
            inputs = np.array(inputs).astype(np.float64)
            inputs = preprocess_input(inputs)

            preds = model.predict(inputs, batch_size=batch_size, verbose=0)
            results = bbox_util.detection_out(preds, soft=soft)

            for result, frame, frame_number in zip(results, frames,
                                                   range(i - batch_size, i)):
                result = [
                    r if len(r) > 0 else np.zeros((1, 6)) for r in result
                ]
                raw_detections = pd.DataFrame(np.vstack(result),
                                              columns=[
                                                  'class_index', 'confidence',
                                                  'xmin', 'ymin', 'xmax',
                                                  'ymax'
                                              ])

                rescale(raw_detections, 'xmin', width)
                rescale(raw_detections, 'xmax', width)
                rescale(raw_detections, 'ymin', height)
                rescale(raw_detections, 'ymax', height)
                rescale(raw_detections, 'class_index', 1)

                ci = raw_detections['class_index']
                cn = [classnames[int(x) - 1] for x in ci]
                raw_detections['class_name'] = cn

                raw_detections['frame_number'] = (frame_number + 2)
                all_detections.append(raw_detections[
                    raw_detections.confidence > csv_conf_thresh])

                if make_vid:
                    frame = draw(frame,
                                 raw_detections,
                                 colors,
                                 conf_thresh=conf_thresh)
                    outvid.append_data(frame)

            frames = []
            inputs = []

        if i % (10 * batch_size) == 0:
            print_flush(i)

    detections = pd.concat(all_detections)

    detections.to_csv(csvpath)
Пример #16
0
class TLClassifier(object):
    def __init__(self):
        NUM_CLASSES = 3 + 1
        input_shape = (300, 300, 3)

        #config_string = rospy.get_param("/traffic_light_config")
        #self.config = yaml.load(config_string)
        #self.stop_line_positions = self.config['stop_line_positions']

        # get path to resources
        #path_to_resources = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..', 'tlc')
        # "prior boxes" in the paper
        #priors = pickle.load(open(os.path.join(path_to_resources, 'prior_boxes_ssd300.pkl'), 'rb'))
        priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
        self.bbox_util = BBoxUtility(NUM_CLASSES, priors)

        # Traffic Light Classifier model and its weights
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        #self.model.load_weights(os.path.join(path_to_resources, self.config['classifier_weights_file']), by_name=True)
        #self.model.load_weights('weights.180314.hdf5', by_name=True)
        self.model.load_weights('checkpoints/weights.07-0.70.hdf5',
                                by_name=True)

        # prevent TensorFlow's ValueError when no raised backend
        dummy = np.zeros((1, 300, 300, 3))
        _ = self.model.predict(dummy, batch_size=1, verbose=0)

        # prevent TensorFlow's ValueError when no raised backend
        dummy = np.zeros((1, 300, 300, 3))
        _ = self.model.predict(dummy, batch_size=1, verbose=0)

        self.is_in_progress = False
        self.last_result = TrafficLight.UNKNOWN

    def get_classification(self, img):
        """Determines the color of the traffic light in the image

        Args:
            img (cv::Mat): image containing the traffic light
            assumed 3D numpy.array (800, 600, 3) with bgr8: CV_8UC3, color image

        Returns:
            int: ID of traffic light color (specified in styx_msgs/TrafficLight)
        """

        #if self.is_in_progress:
        #    return self.last_result, 0, 0, 0, 0, 0
        #self.is_in_progress = True

        # adjust img arg for the model
        pilImg = Image.fromarray(np.uint8(img)).resize((300, 300))
        img = np.array(pilImg)
        img = image.img_to_array(img)
        inputs = np.reshape(img,
                            (1, 300, 300, 3))  # 'inputs' expects this size

        # prediction
        inputs = preprocess_input(np.array(inputs))
        preds = self.model.predict(inputs, batch_size=1, verbose=0)
        results = self.bbox_util.detection_out(preds)

        if results == None or results == [] or results == [[]]:
            self.last_result = TrafficLight.UNKNOWN
            self.is_in_progress = False
            return self.last_result, 0, 0, 0, 0, 0

        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin = results[0][:, 2]
        det_ymin = results[0][:, 3]
        det_xmax = results[0][:, 4]
        det_ymax = results[0][:, 5]

        # Get detections
        top_indices = [j for j, conf in enumerate(det_conf) if conf >= 0.6]
        top_label_indices = det_label[top_indices].tolist()

        if top_label_indices == []:
            return TrafficLight.UNKNOWN, 0, 0, 0, 0, 0
        top_conf = det_conf[top_indices]

        top_xmin = det_xmin[top_indices][0]
        top_ymin = det_ymin[top_indices][0]
        top_xmax = det_xmax[top_indices][0]
        top_ymax = det_ymax[top_indices][0]
        score = top_conf[0]

        # return the first signal detected
        if top_label_indices == []:
            self.last_result = TrafficLight.UNKNOWN, 0, 0, 0, 0, 0
            self.is_in_progress = False
            return self.last_result, 0, 0, 0, 0, 0
        label = int(top_label_indices[0])
        #print "Found label " + str(label) + " at " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        if label == 0:
            return TrafficLight.UNKNOWN, 0, 0, 0, 0, 0
        elif label == 1:
            return TrafficLight.RED, score, top_xmin, top_ymin, top_xmax, top_ymax
        elif label == 2:
            return TrafficLight.YELLOW, score, top_xmin, top_ymin, top_xmax, top_ymax
        elif label == 3:
            return TrafficLight.GREEN, score, top_xmin, top_ymin, top_xmax, top_ymax
        else:
            return TrafficLight.UNKNOWN, score, top_xmin, top_ymin, top_xmax, top_ymax

        self.is_in_progress = False
        return self.last_result, 0, 0, 0, 0, 0
Пример #17
0
def main(dataset, run, input_shape, seq_start, seq_stop, videopath,
         conf_thresh, i_seq, outname, batch_size):

    print_flush("> Predicting...")
    classes = get_classnames(dataset)
    masker = Masker(dataset)

    input_shape = parse_resolution(input_shape)

    num_classes = len(classes) + 1
    model = get_model(dataset, run, input_shape, num_classes, verbose=False)
    priors = get_priors(model, input_shape)
    bbox_util = BBoxUtility(num_classes, priors)

    width = input_shape[0]
    height = input_shape[1]

    inputs = []
    outputs = []
    old_frame = None

    with io.get_reader(videopath) as vid:
        vlen = len(vid)
        for i_in_seq in range(seq_start, seq_stop):
            if i_in_seq < vlen:
                frame = vid.get_data(i_in_seq)
                frame = masker.mask(frame)
                old_frame = frame
            else:
                frame = old_frame

            resized = cv2.resize(frame, (width, height))
            inputs.append(resized)

            if len(inputs) == batch_size:
                inputs2 = np.array(inputs)
                inputs2 = inputs2.astype(np.float32)
                inputs2 = preprocess_input(inputs2)

                y = model.predict_on_batch(inputs2)
                outputs.append(y)

                inputs = []

    preds = np.vstack(outputs)

    print_flush("> Processing...")
    all_detections = []
    seq_len = seq_stop - seq_start

    for i in range(seq_len):
        frame_num = i + seq_start

        if frame_num < vlen:
            pred = preds[i, :]
            pred = pred.reshape(1, pred.shape[0], pred.shape[1])
            results = bbox_util.detection_out(pred, soft=False)

            detections = process_results(results, width, height, classes,
                                         conf_thresh, frame_num)
            all_detections.append(detections)

    dets = pd.concat(all_detections)

    # For the first line, we should open in write mode, and then in append mode
    # This way, we still overwrite the files if this script is run multiple times
    open_mode = 'a'
    include_header = False
    if i_seq == 0:
        open_mode = 'w'
        include_header = True

    print_flush("> Writing to {} ...".format(outname))
    with open(outname, open_mode) as f:
        dets.to_csv(f, header=include_header)
Пример #18
0
class PPM:

    cars = 0
    model = None
    img_path = '/tmp/ppm.jpg'
    bbox_util = None
    conf_limit = 0.6

    def __init__(self, conf_limit=0.6):
        self.conf_limit = conf_limit
        np.set_printoptions(suppress=True)
        config = tf.ConfigProto()
        #config.gpu_options.per_process_gpu_memory_fraction = 0.45
        set_session(tf.Session(config=config))

        self.voc_classes = [
            'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
            'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
            'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
        ]
        NUM_CLASSES = len(self.voc_classes) + 1
        self.bbox_util = BBoxUtility(NUM_CLASSES)

        input_shape = (300, 300, 3)
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        self.model.load_weights('weights_SSD300.hdf5', by_name=True)

    def read_cars(self):
        inputs = []
        images = []
        img = image.load_img(self.img_path, target_size=(300, 300))
        img = image.img_to_array(img)
        images.append(imread(self.img_path))
        inputs.append(img.copy())
        inputs = preprocess_input(np.array(inputs))

        preds = self.model.predict(inputs, batch_size=1, verbose=0)

        results = self.bbox_util.detection_out(preds)
        if results == None or len(results[0]) == 0:
            return 0

        i = 0
        img = images[0]

        # Parse the outputs.
        det_label = results[i][:, 0]
        det_conf = results[i][:, 1]
        det_xmin = results[i][:, 2]
        det_ymin = results[i][:, 3]
        det_xmax = results[i][:, 4]
        det_ymax = results[i][:, 5]

        # Get detections with confidence higher than conf_limit.
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.conf_limit
        ]

        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin = det_xmin[top_indices]
        top_ymin = det_ymin[top_indices]
        top_xmax = det_xmax[top_indices]
        top_ymax = det_ymax[top_indices]

        cars = 0

        for i in range(top_conf.shape[0]):
            xmin = int(round(top_xmin[i] * img.shape[1]))
            ymin = int(round(top_ymin[i] * img.shape[0]))
            xmax = int(round(top_xmax[i] * img.shape[1]))
            ymax = int(round(top_ymax[i] * img.shape[0]))
            score = top_conf[i]
            label = int(top_label_indices[i])
            label_name = self.voc_classes[label - 1]
            print('Label: ' + label_name)
            if label_name == 'car':
                cars += 1
            display_txt = '{:0.2f}, {}'.format(score, label_name)
            coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1

        return cars
Пример #19
0
class TLClassifier(object):
    def __init__(self):
        NUM_CLASSES = 3 + 1
        input_shape = (300, 300, 3)

        config_string = rospy.get_param("/traffic_light_config")
        self.config = yaml.load(config_string)
        self.stop_line_positions = self.config['stop_line_positions']

        # get path to resources
        path_to_resources = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..',
            'tlc')
        # "prior boxes" in the paper
        priors = pickle.load(
            open(os.path.join(path_to_resources, 'prior_boxes_ssd300.pkl'),
                 'rb'))
        self.bbox_util = BBoxUtility(NUM_CLASSES, priors)

        # Traffic Light Classifier model and its weights
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        print(self.model.summary())
        self.model.load_weights(os.path.join(
            path_to_resources, self.config['classifier_weights_file']),
                                by_name=True)

        # prevent TensorFlow's ValueError when no raised backend
        dummy = np.zeros((1, 300, 300, 3))
        _ = self.model.predict(dummy, batch_size=1, verbose=0)

        # prevent TensorFlow's ValueError when no raised backend
        dummy = np.zeros((1, 300, 300, 3))
        _ = self.model.predict(dummy, batch_size=1, verbose=0)

        self.capture_images = False
        self.image_counts = {0: 0, 1: 0, 2: 0, 4: 0}

        self.last_classification = None

    def get_classification(self, imgInput, light_state):
        """Determines the color of the traffic light in the image

        Args:
            img (cv::Mat): image containing the traffic light
            assumed 3D numpy.array (800, 600, 3) with bgr8: CV_8UC3, color image

        Returns:
            int: ID of traffic light color (specified in styx_msgs/TrafficLight)
        """

        # adjust img arg for the model
        pilImg = Image.fromarray(np.uint8(imgInput)).resize((300, 300))
        img = np.array(pilImg)
        img = image.img_to_array(img)
        inputs = np.reshape(img,
                            (1, 300, 300, 3))  # 'inputs' expects this size

        # prediction
        inputs = preprocess_input(np.array(inputs))
        preds = self.model.predict(inputs, batch_size=1, verbose=0)
        results = self.bbox_util.detection_out(preds)

        if results == None or results == [] or results == [[]]:
            if self.last_classification is not None:
                if (datetime.datetime.utcnow() -
                        self.last_classification[1]).total_seconds() > 3.5:
                    self.last_classification = None
                else:
                    return self.last_classification[0]
            self.save_image(imgInput, light_state)
            return TrafficLight.UNKNOWN

        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]

        # Get detections with confidence >= 0.8
        top_indices = [j for j, conf in enumerate(det_conf) if conf >= 0.8]
        top_label_indices = det_label[top_indices].tolist()

        # return the first signal detected
        if top_label_indices == []:
            if self.last_classification is not None:
                if (datetime.datetime.utcnow() -
                        self.last_classification[1]).total_seconds() > 3.5:
                    self.last_classification = None
                else:
                    return self.last_classification[0]
            self.save_image(imgInput, light_state)
            return TrafficLight.UNKNOWN
        label = int(top_label_indices[0])
        #print "Found label " + str(label) + " at " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        if label == 1:
            self.last_classification = (TrafficLight.RED,
                                        datetime.datetime.utcnow())
        elif label == 2:
            self.last_classification = (TrafficLight.YELLOW,
                                        datetime.datetime.utcnow())
        elif label == 3:
            self.last_classification = (TrafficLight.GREEN,
                                        datetime.datetime.utcnow())
        else:
            if self.last_classification is not None:
                if (datetime.datetime.utcnow() -
                        self.last_classification[1]).total_seconds() > 3.5:
                    self.last_classification = None
                else:
                    return self.last_classification[0]
            return TrafficLight.UNKNOWN

        return self.last_classification[0]

    def save_image(self, image, state):
        if self.capture_images and self.image_counts[state] < 100:
            # Save to disk
            path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                "capture", str(state))
            if (not os.path.isdir(path)):
                os.makedirs(path)
            name = str(time.time()) + '.png'
            cv2.imwrite(os.path.join(path, name), image)
            self.image_counts[state] += 1
Пример #20
0
def run_camera(input_shape, model, video_path, image_path_ori,
               image_path_crop):
    num_classes = 21
    conf_thresh = 0.5
    input_shape = input_shape
    bbox_util = BBoxUtility(num_classes)

    class_colors = []
    for i in range(0, num_classes):
        hue = 255 * i / num_classes
        col = np.zeros((1, 1, 3)).astype("uint8")
        col[0][0][0] = hue
        col[0][0][1] = 128  # Saturation
        col[0][0][2] = 255  # Value
        cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
        col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
        class_colors.append(col)

    class_list = os.listdir(video_path)
    for action in class_list:
        all_action = os.listdir(video_path + action)
        for sample in all_action:
            print(video_path + action + '/' + sample)
            name = sample.split('.')[0]
            if not os.path.exists(image_path_ori + action + '/' + name):
                os.mkdir(image_path_ori + action + '/' + name)
            if not os.path.exists(image_path_crop + action + '/' + name):
                os.mkdir(image_path_crop + action + '/' + name)
            vid = cv2.VideoCapture(video_path + action + '/' + sample)

            # Compute aspect ratio of video
            vidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)
            vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)
            frame_length = vid.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
            # vidar = vidw / vidh
            frame_count = 0
            for n in range(int(frame_length)):
                retval, orig_image = vid.read()
                if not retval:
                    print("Done!")
                    return

                im_size = (input_shape[0], input_shape[1])
                resized = cv2.resize(orig_image, im_size)
                rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

                inputs = [image.img_to_array(rgb)]
                tmp_inp = np.array(inputs)
                x = preprocess_input(tmp_inp)

                y = model.predict(x)

                results = bbox_util.detection_out(y)

                if len(results) > 0 and len(results[0]) > 0:
                    det_label = results[0][:, 0]
                    det_conf = results[0][:, 1]
                    det_xmin = results[0][:, 2]
                    det_ymin = results[0][:, 3]
                    det_xmax = results[0][:, 4]
                    det_ymax = results[0][:, 5]

                    top_indices = [
                        i for i, conf in enumerate(det_conf)
                        if conf >= conf_thresh
                    ]

                    top_conf = det_conf[top_indices]
                    top_label_indices = det_label[top_indices].tolist()
                    top_xmin = det_xmin[top_indices]
                    top_ymin = det_ymin[top_indices]
                    top_xmax = det_xmax[top_indices]
                    top_ymax = det_ymax[top_indices]

                    if 15 not in top_label_indices:
                        pass
                    else:
                        for i in range(top_conf.shape[0]):
                            xmin = int(round((top_xmin[i] * vidw) * 0.9))
                            ymin = int(round((top_ymin[i] * vidh) * 0.9))
                            xmax = int(round(
                                (top_xmax[i] * vidw) *
                                1.1)) if int(round(
                                    (top_xmax[i] * vidw) *
                                    1.1)) <= vidw else int(
                                        round(top_xmax[i] * vidw))
                            ymax = int(round(
                                (top_ymax[i] * vidh) *
                                1.1)) if int(round(
                                    (top_ymax[i] * vidh) *
                                    1.1)) <= vidh else int(
                                        round(top_ymax[i] * vidh))

                            # save frames
                            class_num = int(top_label_indices[i])
                            if class_num == 15:
                                frame = cv2.cvtColor(orig_image,
                                                     cv2.COLOR_BGR2GRAY)
                                cv2.imwrite(
                                    image_path_ori + action + '/' + name +
                                    str(10000 + frame_count) + '.jpg', frame)
                                cropImage = frame[ymin:ymax, xmin:xmax]
                                cropImage = cv2.resize(cropImage, (64, 64))
                                cv2.imwrite(
                                    image_path_crop + action + '/' + name +
                                    str(10000 + frame_count) + '.jpg',
                                    cropImage)
                                frame_count += 1
Пример #21
0
    def objct_recognition(self, img_paths):
        """
        ssdを用いた物体検出
        """
        """各種設定"""
        self.img_paths = img_paths
        plt.rcParams['figure.figsize'] = (8, 8)  # グラフサイズ(インチ
        # http://d.hatena.ne.jp/nishiohirokazu/20111121/1321849806
        plt.rcParams['image.interpolation'] = 'nearest'  # 補完アルゴル設定
        np.set_printoptions(suppress=True)  # 出力の圧縮=Trues
        config = tf.ConfigProto()
        # プロセス辺りのGPU占有率
        config.gpu_options.per_process_gpu_memory_fraction = 0.45
        set_session(tf.Session(config=config))
        """各種設定"""
        """認識クラス一覧"""
        voc_classes = [
            'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car',
            'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike',
            'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
        ]
        NUM_CLASSES = len(voc_classes) + 1
        """認識クラス一覧"""
        """認識関連の設定・読み込み"""
        input_shape = (300, 300, 3)  # 入力画像サイズ
        model = SSD300(input_shape, num_classes=NUM_CLASSES)  # モデル確保
        model.load_weights('weights_SSD300.hdf5', by_name=True)  # モデル読み込み
        bbox_util = BBoxUtility(NUM_CLASSES)  # バウンディングボックスクラス?
        """認識関連の設定・読み込み"""
        """画像読み込み"""
        inputs = []
        images = []
        for img_path in self.img_paths:
            img = image.load_img(img_path, target_size=(300, 300))  # 画像読み込み
            img = image.img_to_array(img)  # キャスト
            images.append(imread(img_path))
            inputs.append(img.copy())
        """画像読み込み"""

        inputs = preprocess_input(np.array(inputs))  # 前処理
        preds = model.predict(inputs, batch_size=1, verbose=1)  # 認識
        self.results = bbox_util.detection_out(preds)  # 結果クラス?生成

        # 入力画像ごとループ
        for i_, img in enumerate(images):
            # shapesの更新
            self.shapes.append([img.shape[1], img.shape[0]])

            # 結果クラスを分割
            if self.results[i_] == []:
                # 認識結果ない場合、以降の処理しない
                continue
            det_label = self.results[i_][:, 0]
            det_conf = self.results[i_][:, 1]
            det_xmin = self.results[i_][:, 2]
            det_ymin = self.results[i_][:, 3]
            det_xmax = self.results[i_][:, 4]
            det_ymax = self.results[i_][:, 5]

            # 信頼度0.6以上を取得
            top_indices = [
                i_ for i_, conf in enumerate(det_conf) if conf >= 0.6
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()

            plt.imshow(img / 255.)  # 画像描画
            currentAxis = plt.gca()

            # 描画(認識結果ごとループ)
            for i in range(top_conf.shape[0]):
                xmin = int(round(top_xmin[i] * img.shape[1]))
                ymin = int(round(top_ymin[i] * img.shape[0]))
                xmax = int(round(top_xmax[i] * img.shape[1]))
                ymax = int(round(top_ymax[i] * img.shape[0]))
                score = top_conf[i]
                label = int(top_label_indices[i])
                label_name = voc_classes[label - 1]
                display_txt = '{:0.2f}, {}'.format(score, label_name)
                coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1
                color = colors[label]

                currentAxis.add_patch(
                    plt.Rectangle(*coords,
                                  fill=False,
                                  edgecolor=color,
                                  linewidth=2))
                currentAxis.text(xmin,
                                 ymin,
                                 display_txt,
                                 bbox={
                                     'facecolor': color,
                                     'alpha': 0.5
                                 })

                # print(xmin, ymin, xmax, ymax)

            # 保存
            #plt.savefig("./data/recognition_imgs/"+os.path.basename(img_paths[i_]))
            plt.savefig(os.path.basename(img_paths[i_]))

            plt.close()

        import gc
        gc.collect()  # メモリ解放
Пример #22
0
    with tf.compat.v1.Session(config=config) as s:
        tf_inference = restore_tf_checkpoint(sets, s)
        inputs = preprocess_input(np.array(inputs))
        img_per_batch = 5
        results = []
        start_index = 0

        for end_index in tqdm(
                range(img_per_batch, inputs.shape[0] + 1, img_per_batch)):
            preds = tf_inference['sess'].run(
                fetches=tf_inference['out'],
                feed_dict={
                    tf_inference['in']: inputs[start_index:end_index, :]
                })
            results.extend(bbox_util.detection_out(preds))
            start_index = end_index

        for i, img in tqdm(enumerate(images)):
            # Parse the outputs.
            det_label = results[i][:, 0]
            det_conf = results[i][:, 1]
            det_xmin = results[i][:, 2]
            det_ymin = results[i][:, 3]
            det_xmax = results[i][:, 4]
            det_ymax = results[i][:, 5]

            # Get detections with confidence higher than 0.6.
            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= THRESHOLD
            ]
Пример #23
0
def predict(model, img):
    inputs = []

    plt.cla()

    img = image.img_to_array(img)
    img = np.asarray(img)
    inputs.append(img.copy())
    inputs = np.asarray(inputs)
    inputs = preprocess_input(inputs)
    preds = model.predict(inputs, batch_size=1, verbose=1)
    bbox_util = BBoxUtility(NUM_CLASSES)
    results = bbox_util.detection_out(preds)

    # Parse the outputs.
    det_label = results[0][:, 0]
    det_conf = results[0][:, 1]
    det_xmin = results[0][:, 2]
    det_ymin = results[0][:, 3]
    det_xmax = results[0][:, 4]
    det_ymax = results[0][:, 5]

    top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]  #0.6

    top_conf = det_conf[top_indices]
    top_label_indices = det_label[top_indices].tolist()
    top_xmin = det_xmin[top_indices]
    top_ymin = det_ymin[top_indices]
    top_xmax = det_xmax[top_indices]
    top_ymax = det_ymax[top_indices]

    colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
    plt.imshow(img / 255.)
    currentAxis = plt.gca()

    money_total = 0
    money_num_list = [10, 100, 5]

    for i in range(top_conf.shape[0]):
        xmin = int(round(top_xmin[i] * img.shape[1]))
        ymin = int(round(top_ymin[i] * img.shape[0]))
        xmax = int(round(top_xmax[i] * img.shape[1]))
        ymax = int(round(top_ymax[i] * img.shape[0]))
        score = top_conf[i]
        label = int(top_label_indices[i])
        label_name = voc_classes[label - 1]
        display_txt = '{:0.2f}, {}'.format(score, label_name)
        coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1
        color = colors[label]
        currentAxis.add_patch(
            plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
        currentAxis.text(xmin,
                         ymin,
                         display_txt,
                         bbox={
                             'facecolor': color,
                             'alpha': 0.5
                         })

        money_total = money_total + money_num_list[label - 1]
    plt.title(f'Total:{money_total} yen')
    canvas = FigureCanvasAgg(currentAxis.figure)
    buf = io.BytesIO()
    plt.savefig(buf)
    buf.seek(0)
    return buf
Пример #24
0
def run_camera(input_shape, model):
    num_classes = 21
    conf_thresh = 0.5
    bbox_util = BBoxUtility(num_classes)
    vid = cv2.VideoCapture(0)
    sleep(1.0)
    # Compute aspect ratio of video
    vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
    vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
    trackers = Tracker()
    while True:
        ret, origin_image = vid.read()
        frame = origin_image
        if not ret:
            print("Done!")
            return None
        im_size = (input_shape[0], input_shape[1])
        resized = cv2.resize(frame, im_size)
        rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

        inputs = [image.img_to_array(rgb)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)
        y = model.predict(x)
        results = bbox_util.detection_out(y)
        if len(results) > 0 and len(results[0]) > 0:
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            if 15 not in top_label_indices:
                pass
            else:
                trackers.bbox = []
                trackers.features_current = []
                trackers.index = []
                for i in range(top_conf.shape[0]):
                    class_num = int(top_label_indices[i])
                    if class_num == 15:
                        xmin = int(round((top_xmin[i] * vidw) * 0.9))
                        ymin = int(round((top_ymin[i] * vidh) * 0.9))
                        xmax = int(round((top_xmax[i] * vidw) * 1.1)) if int(round(
                            (top_xmax[i] * vidw)) * 1.1) <= vidw else int(round(
                            top_xmax[i] * vidw))
                        ymax = int(round((top_ymax[i] * vidh) * 1.1)) if int(round(
                            (top_ymax[i] * vidh) * 1.1)) <= vidh else int(round(top_ymax[i] * vidh))
                        trackers.bbox.append([xmin, ymin, xmax, ymax])
                        trackers.features_current.append(
                            Extract_feature(cv2.resize(frame[ymin:ymax, xmin:xmax, :], (32, 32))))
                if trackers.features_previous is None:
                    trackers.index.append(i for i in range(len(trackers.bbox)))
                    for j in range(len(trackers.features_current)):
                        cv2.rectangle(frame, (int(trackers.bbox[j][0]), int(trackers.bbox[j][1])),
                                      (int(trackers.bbox[j][2]), int(trackers.bbox[j][3])), (255, 0, 0), 2)
                        cv2.putText(frame, "person: {}".format(trackers.index[j] + 1),
                                    (trackers.bbox[j][0] + 10, trackers.bbox[j][1] + 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)
                else:
                    trackers.match()
                    trackers.update()
                    for j in range(len(trackers.features_current)):
                        cv2.rectangle(frame, (int(trackers.bbox[j][0]), int(trackers.bbox[j][1])),
                                      (int(trackers.bbox[j][2]), int(trackers.bbox[j][3])), (255, 0, 0), 2)
                        cv2.putText(frame, "person: {}".format(trackers.index[j] + 1),
                                    (trackers.bbox[j][0] + 10, trackers.bbox[j][1] + 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)
        cv2.imshow('tracking', frame)
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break
Пример #25
0
img_path = './pics/car_cat2.jpg'
img = image.load_img(img_path, target_size=(300, 300))
img = image.img_to_array(img)
images.append(imread(img_path))
inputs.append(img.copy())
inputs = preprocess_input(np.array(inputs))


# In[5]:

preds = model.predict(inputs, batch_size=1, verbose=1)


# In[6]:

results = bbox_util.detection_out(preds)


# In[8]:

for i, img in enumerate(images):
    # Parse the outputs.
    det_label = results[i][:, 0]
    det_conf = results[i][:, 1]
    det_xmin = results[i][:, 2]
    det_ymin = results[i][:, 3]
    det_xmax = results[i][:, 4]
    det_ymax = results[i][:, 5]

    # Get detections with confidence higher than 0.6.
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]
Пример #26
0
class SSDPipeline(object):

    def __init__(self):

        voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
        NUM_CLASSES = len(voc_classes) + 1

        input_shape=(300, 300, 3)
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        weights_file = "./checkpoints/weights.10-2.85.hdf5"        
        #weights_file = "./checkpoints/weights.39-1.61_ubuntu.hdf5"

        self.model.load_weights(weights_file, by_name=True)
        self.bbox_util = BBoxUtility(NUM_CLASSES)

    def loadImage(self,video_path):

        vid = cv2.VideoCapture(video_path)
        vidw = vid.get(3) # CV_CAP_PROP_FRAME_WIDTH
        vidh = vid.get(4) # CV_CAP_PROP_FRAME_HEIGHT

        print(vidw,vidh)
        input_shape = (300,300,3)
        vidar = vidw/vidh
        #print(vidar)
        return vidar

    def setClassColors(self):

        self.class_colors = []
        self.class_names = ["background", "Prescription", "None", "title", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"];
        NUM_CLASSES = len(self.class_names)

        for i in range(0, NUM_CLASSES):
            # This can probably be written in a more elegant manner
            hue = 255*i/NUM_CLASSES
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            self.class_colors.append(col) 
        
    def pipeline(self,orig_image):
        
        start_frame = 0

        # this is manual adjustment parameter
        # For binary classifilcation, set higher threshhold rather than 0.5
        conf_thresh = 0.50

        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()

        vidh, vidw, _ = orig_image.shape
        vidar = vidw/vidh

        input_shape = (300,300,3)
        display_shape = (600,600,3)
        
        im_size = (input_shape[0], input_shape[1])   
        resized = cv2.resize(orig_image, im_size)
        to_draw = cv2.resize(resized, (int(input_shape[0]*vidar), input_shape[1]))
        #to_draw = cv2.resize(resized, (int(display_shape[0]*vidar), display_shape[1]))

        #to_draw = orig_image.copy()

        # Use model to predict 
        inputs = [image.img_to_array(resized)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)
        y = self.model.predict(x)
        
        #preds = model.predict(inputs, batch_size=1, verbose=1)
        results = self.bbox_util.detection_out(y)
        
        if len(results) > 0 and len(results[0]) > 0:
            # Interpret output, only one frame is used 
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]


            classes = []
            probs = []
            for i in range(top_conf.shape[0]):
                xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                # Draw the box on top of the to_draw image
                class_num = int(top_label_indices[i])
                
                #  sorry, but x length bigger than half of screen size avoid to 
                #  draw rectangle 
                if ( abs(xmax-xmin) > to_draw.shape[1] / 2. ):
                    continue 

                classes.append(self.class_names[class_num])
                probs.append(top_conf[i])

                cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), 
                            self.class_colors[class_num], 2)
                text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i])

                text_top = (xmin, ymin-10)
                text_bot = (xmin + 80, ymin + 5)
                text_pos = (xmin + 5, ymin)
                cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1)
                cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
                
        # Calculate FPS
        # This computes FPS for everything, not just the model's execution 
        # which may or may not be what you want
        #curr_time = timer()
        #exec_time = curr_time - prev_time
        #prev_time = curr_time
        #accum_time = accum_time + exec_time
        #curr_fps = curr_fps + 1
        #if accum_time > 1:
        #    accum_time = accum_time - 1
        #    fps = "FPS: " + str(curr_fps)
        #    curr_fps = 0

        # Draw FPS in top left corner
        #cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1)
        #cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)

        #print("object NO:", i+1)
        #print("rectangle info: ", coords)
        
        
        return to_draw, classes, probs
Пример #27
0
    def frames():
        video_path = 0 
        start_frame = 0 
        conf_thresh = 0.6
        input_shape = (480,300,3)
        class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
        NUM_CLASSES = len(class_names)
        num_classes=NUM_CLASSES
        class_colors = []
        for i in range(0, num_classes):
            hue = 255*i/num_classes
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            class_colors.append(col) 
        bbox_util = BBoxUtility(num_classes)
        model = SSD(input_shape, num_classes=NUM_CLASSES)
        model.load_weights('weights_SSD300.hdf5') 

        INTERVAL= 33     # 待ち時間
        FRAME_RATE = 20  # fps
        ORG_WINDOW_NAME = "org"
        #GRAY_WINDOW_NAME = "gray"
        #OUT_FILE_NAME = "real_SSD_result.mp4"
        
        vid = cv2.VideoCapture(Camera.video_source)
        width, height = input_shape[0], input_shape[1]  #input_shape
        """
        out = cv2.VideoWriter(OUT_FILE_NAME, \
                      cv_fourcc('M', 'P', '4', 'V'), \
                      FRAME_RATE, \
                      (width, height), \
                      True)
        """
        if not vid.isOpened():
            raise IOError(("Couldn't open video file or webcam. If you're "
            "trying to open a webcam, make sure you video_path is an integer!"))
        
        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
        vidar = vidw/vidh
        """
        if start_frame > 0:
            vid.set(cv2.CAP_PROP_POS_MSEC, start_frame)
        """    
        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()
        start_time=prev_time
        #cv2.namedWindow(ORG_WINDOW_NAME)
        
        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return
                
            im_size = (input_shape[1], input_shape[0])  
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
           
            to_draw = cv2.resize(resized, (int(input_shape[1]*vidar), input_shape[0]))
            
            inputs = [image.img_to_array(rgb)]  #rgb
            tmp_inp = np.array(inputs)
            x = preprocess_input(tmp_inp)
            y = model.predict(x)
            
            results = bbox_util.detection_out(y)
            
            if len(results) > 0 and len(results[0]) > 0:
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                for i in range(top_conf.shape[0]):
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                    class_num = int(top_label_indices[i])
                    cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), 
                              class_colors[class_num], 2)   #to_draw
                    text = class_names[class_num] + " " + ('%.2f' % top_conf[i])

                    text_top = (xmin, ymin-10)
                    text_bot = (xmin + 80, ymin + 5)
                    text_pos = (xmin + 5, ymin)
                    cv2.rectangle(to_draw, text_top, text_bot, class_colors[class_num], -1)  #to_draw
                    cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)  #to_draw
                    print(text," ")
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0
            
            cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1)  #to_draw
            cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) #to_draw
            #yield cv2.imencode('.jpg', to_draw)[1].tobytes()
            to_draw = cv2.resize(to_draw, (int(input_shape[0]*1), input_shape[1]))
            #cv2.imshow(ORG_WINDOW_NAME, to_draw)  #to_draw
            #out.write(to_draw)  #add to_draw
            
            if cv2.waitKey(INTERVAL)>= 0:   # & 0xFF == ord('q'):
                break
            #elif curr_time-start_time>=60:
            #    break
            yield cv2.imencode('.jpg', to_draw)[1].tobytes()
        vid.release()   #add
        #out.release()   #add
        cv2.destroyAllWindows() #add
Пример #28
0
class Detectors:

    def __init__(self):
        #顔検出モデルと年齢・性別検出モデルを復元
        self.age_detector = load_model("transfer_Xception_29.h5")
        NUM_CLASSES = 2
        input_shape = (300, 300, 3)
        priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
        self.bbox_util = BBoxUtility(NUM_CLASSES, priors)
        self.face_detector = SSD300(input_shape, num_classes=NUM_CLASSES)
        self.face_detector.load_weights('weights.05-3.15.hdf5', by_name=True)


    def age_detect(self, input):
        #先頭にNUMの次元が必要なので追加
        input_add = input
        age_predict = self.age_detector.predict(input_add)
        # 年齢をsigmoidの出力(0〜1)から元に戻す(1を116歳にしている)
        age = np.round(age_predict[0]*116).astype(int)
        # 性別
        gender = np.zeros([age_predict[1].shape[0],1],dtype=str)
        for i in range(age_predict[1].shape[0]):
            # 性別は[0.2,0.8]ならF , [0.6,0.4]ならM のように判定
            if 0.5 <= age_predict[1][i][0]:
                gender[i] = 'M'
            else:
                gender[i] = 'F'

        return age, gender #リターンをarray形式で統一


    def face_detect(self, img_path, display=False):

        inputs, images, resize_imgs, bb_coordinate = [], [], [], []

        img = image.load_img(img_path, target_size=(300, 300))
        img = image.img_to_array(img)

        if '/' in img_path:
            img_original = image.load_img(img_path)
            img_original = image.img_to_array(img_original)
        else:
            # s3から取得した場合
            img_original = np.array(image.load_img(img_path))


        images.append(img_original)
        inputs.append(img)
        inputs = preprocess_input(np.array(inputs))

        # predict
        preds = self.face_detector.predict(inputs, batch_size=1, verbose=0)
        results = self.bbox_util.detection_out(preds)

        for i, img in enumerate(images):
            # Parse the outputs.
            det_label = results[i][:, 0]
            det_conf = results[i][:, 1]
            det_xmin = results[i][:, 2]
            det_ymin = results[i][:, 3]
            det_xmax = results[i][:, 4]
            det_ymax = results[i][:, 5]

            top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.3]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            for i in range(top_conf.shape[0]):
                xmin = int(round(top_xmin[i] * img.shape[1]))
                ymin = int(round(top_ymin[i] * img.shape[0]))
                xmax = int(round(top_xmax[i] * img.shape[1]))
                ymax = int(round(top_ymax[i] * img.shape[0]))
                score = top_conf[i]
                label = int(top_label_indices[i])

                bb_coordinate.append(np.array([xmin, ymin, xmax, ymax]))
                detect_img = img_original[ymin:ymax, xmin: xmax, :]
                detect_img = cv2.resize(detect_img,(200, 200))
                resize_imgs.append(detect_img)

        # リサイズ画像の配列と元画像の位置左上の(x,y), 右下の(x,y)を返す
        return np.array(resize_imgs), np.array(bb_coordinate), img_original
def run_camera(input_shape, model, save_path, frame_number):
    num_classes = 21
    conf_thresh = 0.4
    bbox_util = BBoxUtility(num_classes)

    class_colors = []
    for i in range(0, num_classes):
        hue = 255 * i / num_classes
        col = np.zeros((1, 1, 3)).astype("uint8")
        col[0][0][0] = hue
        col[0][0][1] = 128  # Saturation
        col[0][0][2] = 255  # Value
        cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
        col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
        class_colors.append(col)

    vid = cv2.VideoCapture(0)

    # Compute aspect ratio of video
    vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
    vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
    # vidar = vidw / vidh
    samples = os.listdir(save_path)
    sample_count = len(samples)
    empty_count = 0
    image_stack = []
    while True:
        retval, orig_image = vid.read()
        if not retval:
            print("Done!")
            return None

        im_size = (input_shape[0], input_shape[1])
        resized = cv2.resize(orig_image, im_size)
        rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

        inputs = [image.img_to_array(rgb)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)

        y = model.predict(x)

        results = bbox_util.detection_out(y)
        if len(results) > 0 and len(results[0]) > 0:
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= conf_thresh
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            if 15 not in top_label_indices:
                empty_count += 1
                if empty_count == 4:
                    image_stack = []
                    empty_count = 0
            else:
                empty_count = 0
                for i in range(top_conf.shape[0]):
                    xmin = int(round((top_xmin[i] * vidw) * 0.9))
                    ymin = int(round((top_ymin[i] * vidh) * 0.9))
                    xmax = int(round(
                        (top_xmax[i] * vidw) *
                        1.1)) if int(round(
                            (top_xmax[i] * vidw) * 1.1)) <= vidw else int(
                                round(top_xmax[i] * vidw))
                    ymax = int(round(
                        (top_ymax[i] * vidh) *
                        1.1)) if int(round(
                            (top_ymax[i] * vidh) * 1.1)) <= vidh else int(
                                round(top_ymax[i] * vidh))

                    # save frames
                    class_num = int(top_label_indices[i])
                    if class_num == 15:
                        cv2.rectangle(orig_image, (xmin, ymin), (xmax, ymax),
                                      class_colors[class_num], 2)
                        frame = orig_image
                        if len(image_stack) < frame_number:
                            image_stack.append(frame[ymin:ymax, xmin:xmax, :])
                        if len(image_stack) == frame_number:
                            image_stack.pop(0)
                            image_stack.append(frame[ymin:ymax, xmin:xmax, :])
        cv2.imshow("SSD result", orig_image)
        if cv2.waitKey(5) & 0xFF == ord('s'):
            if len(image_stack) == frame_number:
                if not os.path.exists(save_path + str(sample_count + 1)):
                    os.mkdir(save_path + str(sample_count + 1))
                for pic in range(frame_number):
                    cv2.imwrite(
                        save_path + str(sample_count + 1) + '/' +
                        str(1000 + pic) + '.jpg', image_stack[pic])
                    print('saving ' + save_path + str(sample_count + 1) + '/' +
                          str(1000 + pic) + '.jpg')
                image_stack = []
                empty_count = 0
                sample_count += 1
Пример #30
0
class VideoTest(object):
    """ Class for testing a trained SSD model on a video file and show the# {{{
        result in a window. Class is designed so that one VideoTest object 
        can be created for a model, and the same object can then be used on 
        multiple videos and webcams.      
        
        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.
                         
            model:       An SSD model. It should already be trained for 
                         images similar to the video to test on.
                         
            input_shape: The shape that the model expects for its input, 
                         as a tuple, for example (300, 300, 3)    
                         
            bbox_util:   An instance of the BBoxUtility class in ssd_utils.py
                         The BBoxUtility needs to be instantiated with 
                         the same number of classes as the length of        
                         class_names.
    
    """# }}}

    def __init__(self, class_names, model, input_shape, confidence):  # {{{
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.confidence = confidence
        self.bbox_util = BBoxUtility(self.num_classes)
        self.next_ID = 0
        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128  # Saturation
            col[0][0][2] = 255  # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                   int(cvcol[0][0][2]))
            self.class_colors.append(col)  # }}}

    def run(self, video_path=0, start_frame=0, conf_thresh=0):
        """ Runs the test on a video (or webcam)             # {{{
        
        # Arguments
        video_path: A file path to a video to be tested on. Can also be a number, 
                    in which case the webcam with the same number (i.e. 0) is 
                    used instead
                    
        start_frame: The number of the first frame of the video to be processed
                     by the network. 
                     
        conf_thresh: Threshold of confidence. Any boxes with lower confidence 
                     are not visualized.
                    
        """

        vid = cv2.VideoCapture(video_path)
        if not vid.isOpened():
            raise IOError((
                "Couldn't open video file or webcam. If you're "
                "trying to open a webcam, make sure you video_path is an integer!"
            ))  # }}}

        # Compute aspect ratio of video     # {{{
        #msvidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)
        #vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)
        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
        vidar = vidw / vidh  # }}}

        # Skip frames until reaching start_frame# {{{
        if start_frame > 0:
            vid.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame)

        accum_time = 0
        video_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()  # }}}

        gx, gy, gt, gid = [], [], [], []

        hsv = [[int(np.random.rand() * 100), 255, 255] for i in range(255)]
        for i in range(len(hsv)):
            hsv[i][0] = (29 * i) % 100
        #color = np.random.rand(1024,3)
        color = []
        for i in range(len(hsv)):
            color.append(hsv2rgb(hsv[i][0], hsv[i][1], hsv[i][2]))
            color[i][0] = float(color[i][0] / 255)
            color[i][1] = float(color[i][1] / 255)
            color[i][2] = float(color[i][2] / 255)

        #4 point designation
        w = 4.3
        h = 5.4

        pts1 = np.float32([[650, 298], [1275, 312], [494, 830], [1460, 845]])
        pts1 *= self.input_shape[1] / vidh
        pts2 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])

        Homography = cv2.getPerspectiveTransform(pts1, pts2)
        Homography2 = cv2.getPerspectiveTransform(pts2, pts1)

        dt = 1 / vid.get(cv2.CAP_PROP_FPS)

        trackers = []

        pub_gauss1 = rospy.Publisher('gauss1',
                                     PoseWithCovarianceStamped,
                                     queue_size=10)
        pub_gauss2 = rospy.Publisher('gauss2',
                                     PoseWithCovarianceStamped,
                                     queue_size=10)
        pub_gauss3 = rospy.Publisher('gauss3',
                                     PoseWithCovarianceStamped,
                                     queue_size=10)
        pub_markers = rospy.Publisher('markers', MarkerArray, queue_size=10)
        rospy.init_node('tracker', anonymous=True)
        r = rospy.Rate(10)

        gauss1 = PoseWithCovarianceStamped()
        gauss2 = PoseWithCovarianceStamped()
        gauss3 = PoseWithCovarianceStamped()
        gauss1.header.frame_id = "map"
        gauss2.header.frame_id = "map"
        gauss3.header.frame_id = "map"

        markers = MarkerArray()
        plots = []

        while not rospy.is_shutdown():
            retval, orig_image = vid.read()  # {{{
            if not retval:
                print("Done!")
                break
                #return
            im_size = (self.input_shape[0], self.input_shape[1])  #(300,300)
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

            # Reshape to original aspect ratio for later visualization
            # The resized version is used, to visualize what kind of resolution
            # the network has to work with.
            to_draw = cv2.resize(
                resized,
                (int(self.input_shape[0] * vidar), self.input_shape[1]))

            # Use model to predict
            inputs = [image.img_to_array(rgb)]
            tmp_inp = np.array(inputs)
            X = preprocess_input(tmp_inp)

            Y = self.model.predict(X)

            # This line creates a new TensorFlow device every time. Is there a
            # way to avoid that?
            results = self.bbox_util.detection_out(Y)  # }}}
            new_datas = []
            if len(results) > 0 and len(results[0]) > 0:
                # Interpret output, only one frame is used
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                # top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]
                top_indices = [
                    i for i, conf in enumerate(det_conf)
                    if conf >= self.confidence
                ]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                #Bbox
                for i in range(top_conf.shape[0]):
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                    # Draw the box on top of the to_draw image
                    class_num = int(top_label_indices[i])
                    if ((self.class_names[class_num] == 'person') &
                        (top_conf[i] >= 0.9)):  #0.6#0.9#0.996
                        cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                                      self.class_colors[class_num], 2)
                        text = self.class_names[class_num] + " " + (
                            '%.2f' % top_conf[i])

                        text_top = (xmin, ymin - 10)
                        text_bot = (xmin + 80, ymin + 5)
                        text_pos = (xmin + 5, ymin)
                        cv2.rectangle(to_draw, text_top, text_bot,
                                      self.class_colors[class_num], -1)

                        #print(text , '%.2f' % video_time , ( (xmin+xmax)/2, ymax ) )
                        cv2.putText(to_draw, text, text_pos,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0),
                                    1)
                        cv2.circle(to_draw, ((xmin + xmax) / 2, ymax), 3,
                                   (0, 0, 255), -1)

                        imagepoint = [[(xmin + xmax) / 2], [ymax], [1]]
                        groundpoint = np.dot(Homography, imagepoint)
                        groundpoint = (groundpoint / groundpoint[2]).tolist()
                        groundpoint[0] = groundpoint[0][0]
                        groundpoint[1] = groundpoint[1][0]
                        groundpoint[2] = groundpoint[2][0]

                        # if((0<=groundpoint[0]) & (groundpoint[0]<=w) & (0<=groundpoint[1]) & (groundpoint[1]<=h)):
                        #     print(text , '%.2f' % video_time , ('%.2f' % groundpoint[0] , '%.2f' % groundpoint[1]) )
                        #     gx.append(groundpoint[0])
                        #     gy.append(groundpoint[1])
                        #     gt.append(video_time)
                        #     new_datas.append([gx[-1],gy[-1],gt[-1],0])
                        print(text, '%.2f' % video_time,
                              ('%.2f' % groundpoint[0],
                               '%.2f' % groundpoint[1]))
                        gx.append(groundpoint[0])
                        gy.append(groundpoint[1])
                        gt.append(video_time)
                        new_datas.append([gx[-1], gy[-1], gt[-1], 0])
            # motion update
            for i in range(len(trackers)):
                trackers[i].kf_motion()
            # measurement update
            for i in range(len(trackers)):
                for j in range(len(new_datas)):
                    if (trackers[i].in_error_ellipse(
                            trackers[i].x - new_datas[j][0],
                            trackers[i].y - new_datas[j][1])):
                        trackers[i].kf_measurement_update(
                            new_datas[j][0], new_datas[j][1])
                        trackers[i].update(trackers[i].x, trackers[i].y,
                                           video_time)
                        # plot(trackers[i].x,trackers[i].y,i,trackers[i].col,Homography2,to_draw,plots)
                        gid.append(trackers[i].ID)
                        new_datas[j][3] = 1
                plot(trackers[i].x, trackers[i].y, i, trackers[i].col,
                     Homography2, to_draw, plots)

            # ROS pose with coveriance# {{{
            if (len(trackers)):
                gauss1.pose.pose.position.x = trackers[0].x
                gauss1.pose.pose.position.y = trackers[0].y
                theta = m.atan(trackers[0].vy / trackers[0].vx)
                q = tf.transformations.quaternion_from_euler(theta, 0, 0)
                gauss1.pose.pose.orientation.x = q[0]
                gauss1.pose.pose.orientation.y = q[1]
                gauss1.pose.pose.orientation.z = q[2]
                gauss1.pose.pose.orientation.w = q[3]
                gauss1.pose.covariance = np.zeros(36)
                gauss1.pose.covariance[0] = trackers[0].P[0, 0]
                gauss1.pose.covariance[1] = trackers[0].P[0, 1]
                gauss1.pose.covariance[6] = trackers[0].P[1, 0]
                gauss1.pose.covariance[7] = trackers[0].P[1, 1]
                pub_gauss1.publish(gauss1)
            if (len(trackers) > 1):
                gauss2.pose.pose.position.x = trackers[1].x
                gauss2.pose.pose.position.y = trackers[1].y
                theta = m.atan(trackers[1].vy / trackers[1].vx)
                q = tf.transformations.quaternion_from_euler(0, 0, theta)
                gauss2.pose.pose.orientation.x = q[0]
                gauss2.pose.pose.orientation.y = q[1]
                gauss2.pose.pose.orientation.z = q[2]
                gauss2.pose.pose.orientation.w = q[3]
                gauss2.pose.covariance = np.zeros(36)
                gauss2.pose.covariance[0] = trackers[1].P[0, 0]
                gauss2.pose.covariance[1] = trackers[1].P[0, 1]
                gauss2.pose.covariance[6] = trackers[1].P[1, 0]
                gauss2.pose.covariance[7] = trackers[1].P[1, 1]
                pub_gauss2.publish(gauss2)
            if (len(trackers) > 2):
                gauss3.pose.pose.position.x = trackers[2].x
                gauss3.pose.pose.position.y = trackers[2].y
                theta = m.atan(trackers[2].vy / trackers[2].vx)
                q = tf.transformations.quaternion_from_euler(0, 0, theta)
                gauss3.pose.pose.orientation.x = q[0]
                gauss3.pose.pose.orientation.y = q[1]
                gauss3.pose.pose.orientation.z = q[2]
                gauss3.pose.pose.orientation.w = q[3]
                gauss3.pose.covariance = np.zeros(36)
                gauss3.pose.covariance[0] = trackers[1].P[0, 0]
                gauss3.pose.covariance[1] = trackers[1].P[0, 1]
                gauss3.pose.covariance[6] = trackers[1].P[1, 0]
                gauss3.pose.covariance[7] = trackers[1].P[1, 1]
                pub_gauss3.publish(gauss3)  # }}}

            #scores = [[0 for i in range(len(new_datas))] for j in range(len(trackers))]
            #for i in range(len(trackers)):
            #    trackers[i].kf_motion()
            #    for j in range(len(new_datas)):
            #        scores[i][j] = tracker[i].pro_dens_2d(new_datas[j][0],new_datas[j][1])

            #generate new tracker
            for i in range(len(new_datas)):
                if (new_datas[i][3] == 0):
                    newdetec = len(gx) - len(new_datas) + i
                    trackers.append(
                        Tracker(self.next_ID, gx[newdetec], gy[newdetec],
                                video_time, dt))
                    gid.append(self.next_ID)
                    plot(trackers[self.next_ID].x, trackers[self.next_ID].y,
                         self.next_ID, trackers[self.next_ID].col, Homography2,
                         to_draw, plots)
                    self.next_ID += 1

            # Calculate FPS# {{{
            # This computes FPS for everything, not just the model's execution
            # which may or may not be what you want
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            video_time = video_time + 1 / vid.get(cv2.CAP_PROP_FPS)
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0  # }}}

            # Draw FPS in top left corner# {{{
            cv2.rectangle(to_draw, (0, 0), (50, 17), (255, 255, 255), -1)
            cv2.putText(to_draw, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                        (0, 0, 0), 1)  # }}}

            for i in range(len(plots)):
                cv2.circle(to_draw, (plots[i][0], plots[i][1]), 3,
                           (plots[i][2][0] * 255, plots[i][2][1] * 255,
                            plots[i][2][2] * 255), -1)

            for i in range(len(gx)):
                marker = Marker()
                marker.header.frame_id = "map"
                marker.header.stamp = rospy.Time.now()
                marker.ns = "basic_shapes"
                marker.id = i
                marker.type = 2  #sphere
                marker.action = Marker.ADD
                marker.pose.position.x = gx[i]
                marker.pose.position.y = gy[i]
                marker.pose.position.z = 0.
                marker.pose.orientation.x = 0.
                marker.pose.orientation.y = 0.
                marker.pose.orientation.z = 0.
                marker.pose.orientation.w = 1.
                marker.scale.x = 0.1
                marker.scale.y = 0.1
                marker.scale.z = 0.1
                marker.color.r = color[gid[i]][2]
                marker.color.g = color[gid[i]][1]
                marker.color.b = color[gid[i]][0]
                marker.color.a = 1.
                marker.lifetime = rospy.Duration(0)

                markers.markers.append(marker)
            #print len(markers.markers)
            pub_markers.publish(markers)
            del markers.markers[:]

            # draw a sqare# {{{
            cv2.line(to_draw, (pts1[0][0], pts1[0][1]),
                     (pts1[1][0], pts1[1][1]), (100, 200, 100),
                     thickness=2)
            cv2.line(to_draw, (pts1[0][0], pts1[0][1]),
                     (pts1[2][0], pts1[2][1]), (100, 200, 100),
                     thickness=2)
            cv2.line(to_draw, (pts1[3][0], pts1[3][1]),
                     (pts1[1][0], pts1[1][1]), (100, 200, 100),
                     thickness=2)
            cv2.line(to_draw, (pts1[3][0], pts1[3][1]),
                     (pts1[2][0], pts1[2][1]), (100, 200, 100),
                     thickness=2)  # }}}# }}}

            cv2.imshow("SSD result", to_draw)
            cv2.waitKey(10)
            r.sleep()

        #create graph# {{{
        #fig = plt.figure()
        #ax=Axes3D(fig)
        #color = np.random.rand(len(trackers),3)
        #for i in range(len(gx)):
        #    iro = (color[gid[i]][2],color[gid[i]][1],color[gid[i]][0])
        #    ax.scatter(gx[i],gy[i],gt[i],s=5,c=iro)
        #ax.scatter(gx, gy, gt, s=5, c="blue")
        #ax.set_xlabel('x')
        #ax.set_ylabel('y')
        #ax.set_zlabel('t')
        #plt.show()# }}}

        cv2.destroyAllWindows()
        vid.release()

        return
Пример #31
0
images.append(imread(img_path))
inputs.append(img.copy())
img_path = './pics/1.png'
img = image.load_img(img_path, target_size=(300, 300))
img = image.img_to_array(img)
images.append(imread(img_path))
inputs.append(img.copy())
inputs = preprocess_input(np.array(inputs))

# In[5]:

preds = model.predict(inputs, batch_size=1, verbose=1)

# In[6]:

results = bbox_util.detection_out(preds)

# In[8]:
count = 0
for i, img in enumerate(images):
    # Parse the outputs.
    det_label = results[i][:, 0]
    det_conf = results[i][:, 1]
    det_xmin = results[i][:, 2]
    det_ymin = results[i][:, 3]
    det_xmax = results[i][:, 4]
    det_ymax = results[i][:, 5]

    # Get detections with confidence higher than 0.6.
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.3]  #0.6
    def create_model_prediction(self, n_images=400):
        priors = pickle.load(
            open(
                os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    'priorFiles/prior_boxes_ssd300MobileNetV2_224_224.pkl'),
                'rb'))

        np.set_printoptions(suppress=True)

        bbox_util = BBoxUtility(NUM_CLASSES, priors)

        inputs = []
        images = []
        result_images = []
        annotation_files = []

        print('Prepare : {} files for evaluation. '.format(n_images))
        input_shape = (self.sets['img_height'], self.sets['img_width'], 3)

        with open(
                os.path.join(self.sets['dataset_dir'],
                             'VOC2007/ImageSets/Main/test.txt'),
                'r') as annot_f:
            for annotation in tqdm(list(annot_f)[:n_images]):
                try:
                    img_path = os.path.join(
                        self.sets['dataset_dir'], 'VOC2007/JPEGImages/'
                    ) + annotation.split(' ')[0].strip() + '.jpg'
                    img = image.load_img(img_path,
                                         target_size=(input_shape[0],
                                                      input_shape[1]))
                    img = image.img_to_array(img)
                    result_images.append(img_path)
                    images.append(img)
                    inputs.append(img.copy())
                    annotation_files.append(annotation)
                except Exception as e:
                    print('Error while opening file.', e)

        result_detections = []

        # inputs = preprocess_input(np.array(inputs)[:, :, :, ::-1], mode="tf")
        inputs = np.array(inputs)
        inputs = preprocess_input(inputs)

        print('inputs: {}'.format(inputs.shape))

        results = []
        for img in tqdm(inputs):
            # self.m._model.set_input(input_name, tvm.nd.array(img))
            # self.m._model.run()
            tvm_output = self.m.predict_on_batch(img)
            # ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=1)
            # prof_res = np.array(ftimer().results) * 1000  # convert to millisecond

            tvm_output = self.m._model.get_output(0)
            img_result = bbox_util.detection_out(tvm_output.asnumpy())
            results.append(img_result)

        results = np.array(results)
        results = np.squeeze(results, axis=1)
        print('results: {}'.format(results.shape))

        for i, img in tqdm(enumerate(images)):
            det_label = results[i][:, 0]
            det_conf = results[i][:, 1]
            det_xmin = results[i][:, 2]
            det_ymin = results[i][:, 3]
            det_xmax = results[i][:, 4]
            det_ymax = results[i][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= THRESHOLD
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            detections = []
            for i in range(top_conf.shape[0]):
                '''
                xmin = int(round(top_xmin[i] * img.shape[1]))
                ymin = int(round(top_ymin[i] * img.shape[0]))
                xmax = int(round(top_xmax[i] * img.shape[1]))
                ymax = int(round(top_ymax[i] * img.shape[0]))
                '''
                xmin = top_xmin[i]
                ymin = top_ymin[i]
                xmax = top_xmax[i]
                ymax = top_ymax[i]

                score = top_conf[i]
                label = int(top_label_indices[i])
                label_name = CLASSES[label - 1]
                detections.append([
                    '{:.2f}'.format(xmin), '{:.2f}'.format(ymin),
                    '{:.2f}'.format(xmax), '{:.2f}'.format(ymax), label_name,
                    '{:.2f}'.format(score)
                ])
            result_detections.append(detections)

        print('Test images: {}'.format(len(result_images)))
        print('result_detections: {}'.format(len(result_detections)))

        model_predictions = []
        MODEL_PREDICTION_PATH = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'model_evaluation/model_prediction/')
        predicted_images = []

        for index, image_filename in tqdm(enumerate(result_images)):
            image_name = os.path.basename(image_filename)
            path_elements = image_name[:-4]
            predicted_images.append(image_name[:-4])
            annot_dir = os.path.join(MODEL_PREDICTION_PATH)
            os.makedirs(annot_dir, exist_ok=True)
            annot_name = '{}.txt'.format(path_elements)
            annot_filename = os.path.join(annot_dir, annot_name)
            with open(annot_filename, 'w') as output_f:
                for d in result_detections[index]:
                    left, top, right, botton, classe, score = d[0], d[1], d[
                        2], d[3], d[4], d[5]
                    model_predictions.append(
                        (classe, score, left, top, right, botton))
                    output_f.write('{} {} {} {} {} {}\n'.format(
                        classe, score, left, top, right, botton))

        GROUND_TRUTH_LABELS = os.path.join(self.sets['dataset_dir'],
                                           'VOC2007/Annotations')
        GROUND_TRUTH_PATH = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'model_evaluation/ground_truth/')

        for f in glob(GROUND_TRUTH_PATH + '*'):
            os.remove(f)
        filenames = os.listdir(GROUND_TRUTH_LABELS)
        ground_images = []

        for filename in tqdm(filenames):
            if filename[:-4] not in predicted_images:
                continue
            ground_images.append(filename[:-4])
            tree = ElementTree.parse(
                os.path.join(GROUND_TRUTH_LABELS + '/{}'.format(filename)))
            root = tree.getroot()
            bounding_boxes = []
            one_hot_classes = []
            size_tree = root.find('size')
            width = float(size_tree.find('width').text)
            height = float(size_tree.find('height').text)
            for object_tree in root.findall('object'):
                for bounding_box in object_tree.iter('bndbox'):
                    xmin = float(bounding_box.find('xmin').text) / width
                    ymin = float(bounding_box.find('ymin').text) / height
                    xmax = float(bounding_box.find('xmax').text) / width
                    ymax = float(bounding_box.find('ymax').text) / height
                    class_name = object_tree.find('name').text.title()
                bounding_box = [class_name, xmin, ymin, xmax, ymax]
                bounding_boxes.append(bounding_box)

            with open(
                    os.path.join(GROUND_TRUTH_PATH,
                                 filename.replace('xml', 'txt')), 'w+') as f:
                for p in bounding_boxes:
                    f.write(' '.join([str(s) for s in p]) + "\n")

        print('Completed eval preparation')
        assert len(ground_images) == len(predicted_images)
Пример #33
0
class VideoTest(object):
    """ Class for testing a trained SSD model on a video file and show the
        result in a window. Class is designed so that one VideoTest object 
        can be created for a model, and the same object can then be used on 
        multiple videos and webcams.
        
        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.
                         
            model:       An SSD model. It should already be trained for 
                         images similar to the video to test on.
                         
            input_shape: The shape that the model expects for its input, 
                         as a tuple, for example (300, 300, 3)    
                         
            bbox_util:   An instance of the BBoxUtility class in ssd_utils.py
                         The BBoxUtility needs to be instantiated with 
                         the same number of classes as the length of        
                         class_names.
    
    """
    
    def __init__(self, class_names, model, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)
        
        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255*i/self.num_classes
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            self.class_colors.append(col) 
        
    def run(self, video_path = 0, start_frame = 0, conf_thresh = 0.6):
        """ Runs the test on a video (or webcam)
        
        # Arguments
        video_path: A file path to a video to be tested on. Can also be a number, 
                    in which case the webcam with the same number (i.e. 0) is 
                    used instead
                    
        start_frame: The number of the first frame of the video to be processed
                     by the network. 
                     
        conf_thresh: Threshold of confidence. Any boxes with lower confidence 
                     are not visualized.
                    
        """
    
        vid = cv2.VideoCapture(video_path)
        if not vid.isOpened():
            raise IOError(("Couldn't open video file or webcam. If you're "
            "trying to open a webcam, make sure you video_path is an integer!"))
        
        # Compute aspect ratio of video     
        vidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)
        vidar = vidw/vidh
        
        # Skip frames until reaching start_frame
        if start_frame > 0:
            vid.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame)
            
        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()
            
        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return
                
            im_size = (self.input_shape[0], self.input_shape[1])    
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
            
            # Reshape to original aspect ratio for later visualization
            # The resized version is used, to visualize what kind of resolution
            # the network has to work with.
            to_draw = cv2.resize(resized, (int(self.input_shape[0]*vidar), self.input_shape[1]))
            
            # Use model to predict 
            inputs = [image.img_to_array(rgb)]
            tmp_inp = np.array(inputs)
            x = preprocess_input(tmp_inp)
            
            y = self.model.predict(x)
            
            
            # This line creates a new TensorFlow device every time. Is there a 
            # way to avoid that?
            results = self.bbox_util.detection_out(y)
            
            if len(results) > 0 and len(results[0]) > 0:
                # Interpret output, only one frame is used 
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                for i in range(top_conf.shape[0]):
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                    # Draw the box on top of the to_draw image
                    class_num = int(top_label_indices[i])
                    cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), 
                                  self.class_colors[class_num], 2)
                    text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i])

                    text_top = (xmin, ymin-10)
                    text_bot = (xmin + 80, ymin + 5)
                    text_pos = (xmin + 5, ymin)
                    cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1)
                    cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
            
            # Calculate FPS
            # This computes FPS for everything, not just the model's execution 
            # which may or may not be what you want
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0
            
            # Draw FPS in top left corner
            cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1)
            cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
            
            cv2.imshow("SSD result", to_draw)
            cv2.waitKey(10)
Пример #34
0
def predict_to_coord(model_weights,
                     image_path,
                     band=[4, 3, 2],
                     conf_threshold=0.5):
    '''
    Input :
        model_weights 训练好的模型权重
        image_path  要预测的图像 eg: r'...\to_xiangyu\geo_.tif'
        band    选区的波段 第一波段标号为 1
        cconf_threshold     输出结果的阈值大小
        
        
    Output :
        Prj_id   影像的 地理参考系编号
        result_items  list (行号,列号,置信度,坐标
                            行号,列号,置信度,坐标)
                            ...
    
    
    '''
    voc_classes = ['popp']
    NUM_CLASSES = len(voc_classes) + 1

    input_shape = (300, 300, 3)
    model = SSD300(input_shape, num_classes=NUM_CLASSES)

    #model.load_weights('./checkpoints_330/SSD_4_100_0.0002.hdf5', by_name=True)
    model.load_weights(model_weights, by_name=True)
    bbox_util = BBoxUtility(NUM_CLASSES)

    #dataset = gdal.Open(r'C:\Liuxiangyu\毕业实验\基础数据\to_xiangyu\geo_.tif')
    dataset = gdal.Open(image_path)

    GeoTransform = dataset.GetGeoTransform()
    x0, y0 = GeoTransform[0], GeoTransform[3]  #影像左上方坐标
    x_pixel, y_pixel = GeoTransform[1], GeoTransform[5]  #行列 分辨率

    Prj_id = int(dataset.GetProjection().split('"')[-2])  #投影参数,eg: 4326

    im_width = dataset.RasterXSize  #栅格矩阵的列数
    im_height = dataset.RasterYSize  #栅格矩阵的行数
    im_band = dataset.RasterCount
    #im_data = dataset.ReadAsArray(0,0,im_width,im_height)

    #get shapelike(width, height, band)
    #im_data = np.swapaxes(im_data,0,1)
    #im_data = np.swapaxes(im_data,2,1)

    if np.max(band) > im_band:

        raise Exception('the parameter  band is out of all bands')


#    if im_band == 5:
#        im_datas =im_data[...,band]
#
#    elif im_band == 4:
#        im_datas =im_data[...,[2,1,0]]
#    else:
#        raise Exception('only can open rasters which has 4 or 5 bands')
    band = [b - 1 for b in band]
    #im_datas =im_data[...,band]

    #input_size =300
    y_num = int(im_height / overlap)
    x_num = int(im_width / overlap)
    #    x_num =math.ceil(im_width/input_size)
    #    y_num =math.ceil(im_height/input_size)
    result_items = []
    for row in range(y_num):
        for column in range(x_num):

            y_st, x_st = overlap * row, overlap * column

            if im_width >= x_st + cut_size:
                if im_height >= y_st + cut_size:
                    im_data = dataset.ReadAsArray(x_st, y_st, cut_size,
                                                  cut_size)
                else:
                    im_data = dataset.ReadAsArray(x_st, y_st, cut_size,
                                                  im_height - y_st)
            else:
                if im_height >= y_st + cut_size:
                    im_data = dataset.ReadAsArray(x_st, y_st, im_width - x_st,
                                                  cut_size)
                else:
                    im_data = dataset.ReadAsArray(x_st, y_st, im_width - x_st,
                                                  im_height - y_st)
            im_data[im_data > 65534] = 0

            x = np.swapaxes(im_data, 0, 1)
            x = np.swapaxes(x, 2, 1)
            im_data = x[..., band]
            #            im_data = im_datas[row*input_size:(row+1)*input_size, column*input_size:(column+1)*input_size, :]
            im_data_max = im_data.max()
            im_data_min = im_data.min()
            inputs = []
            images = []
            mask = im_data.copy()
            mask[mask > 0] = 1
            data_radio = np.sum(mask) / (3 * cut_size**2
                                         )  #计算无效区的像素占比, 少于一半的不参与预测
            if data_radio > 0.4:

                input_data = (im_data - im_data_min) / (im_data_max -
                                                        im_data_min) * 255
                input_data = cv2.resize(input_data, (300, 300))
                images.append(input_data)
                inputs.append(input_data)

                inputs = preprocess_input(np.array(inputs))
                preds = model.predict(inputs, batch_size=1, verbose=1)
                results = bbox_util.detection_out(preds)

                for i, img in enumerate(images):
                    # Parse the outputs.
                    #det_label =results[i][:, 0]
                    try:

                        det_conf = results[i][:, 1]
                        det_xmin = results[i][:, 2]
                        det_ymin = results[i][:, 3]
                        det_xmax = results[i][:, 4]
                        det_ymax = results[i][:, 5]

                        # Get detections with confidence higher than 0.6.
                        top_indices = [
                            i for i, conf in enumerate(det_conf)
                            if conf >= conf_threshold
                        ]

                        top_conf = det_conf[top_indices]
                        #top_label_indices = det_label[top_indices].tolist()
                        top_xmin = det_xmin[top_indices]
                        top_ymin = det_ymin[top_indices]
                        top_xmax = det_xmax[top_indices]
                        top_ymax = det_ymax[top_indices]
                    except:
                        print('this image has no result, has pass')
                        pass

                    for i in range(top_conf.shape[0]):
                        #                        xmin = int(round(top_xmin[i] * img.shape[1]))
                        #                        ymin = int(round(top_ymin[i] * img.shape[0]))
                        #                        xmax = int(round(top_xmax[i] * img.shape[1]))
                        #                        ymax = int(round(top_ymax[i] * img.shape[0]))
                        score = top_conf[i]

                        rel_xmin = x0 + x_pixel * cut_size * (top_xmin[i] +
                                                              column)
                        rel_ymin = y0 + y_pixel * cut_size * (
                            top_ymin[i] + row)  #此处y_pixel为负数表示向下
                        rel_xmax = x0 + x_pixel * cut_size * (top_xmax[i] +
                                                              column)
                        rel_ymax = y0 + y_pixel * cut_size * (
                            top_ymax[i] + row)  #此处y_pixel为负数表示向下

                        coods = ((rel_xmin, rel_ymin), (rel_xmax, rel_ymin),
                                 (rel_xmax, rel_ymax), (rel_xmin, rel_ymax),
                                 (rel_xmin, rel_ymin))
                        item = (row, column, score, coods)  #(行号,列号,置信度,坐标)
                        result_items.append(item)
    del dataset

    return Prj_id, result_items