예제 #1
0
파일: Dataset.py 프로젝트: ftsai231/yolo_v3
    def parse_annotations(self, annotation, id):
        image_path = './train_car_person/' + str(id) + '.jpg'
        image = np.array(cv2.imread(image_path))
        bboxes = []

        for ann in annotation['annotations']:
            if ann['image_id'] == id:
                x_top_left = ann['bbox'][0]
                y_top_left = ann['bbox'][1]
                w = ann['bbox'][2]
                h = ann['bbox'][3]
                c = ann['category_id']
                if c != 1 and c != 3:
                    continue
                elif c == 1:
                    c = 0
                else:
                    c = 1

                x_bottom_right = x_top_left + w
                y_bottom_right = y_top_left + h
                x_top_left, y_top_left, x_bottom_right, y_bottom_right, c = int(
                    x_top_left), int(y_top_left), int(x_bottom_right), int(
                        y_bottom_right), int(c)
                bboxes.append([
                    x_top_left, y_top_left, x_bottom_right, y_bottom_right, c
                ])

        bboxes = np.array(bboxes)
        # print("bboxes before preprocess: ", bboxes)
        image, bboxes = utils.image_preprocess(
            image, [self.train_input_size, self.train_input_size],
            np.copy(bboxes))
        # print("bboxes after preprocess: ", bboxes)
        return image, bboxes
예제 #2
0
파일: dataset.py 프로젝트: geekjr/quickai
    def parse_annotation(self, annotation):
        line = annotation.split()
        image_path = line[0]
        if not os.path.exists(image_path):
            raise KeyError("%s does not exist ... " % image_path)
        image = cv2.imread(image_path)
        if self.dataset_type == "converted_coco":
            bboxes = np.array(
                [list(map(int, box.split(","))) for box in line[1:]])
        elif self.dataset_type == "yolo":
            height, width, _ = image.shape
            bboxes = np.array(
                [list(map(float, box.split(","))) for box in line[1:]])
            bboxes = bboxes * np.array([width, height, width, height, 1])
            bboxes = bboxes.astype(np.int64)

        if self.data_aug:
            image, bboxes = self.random_horizontal_flip(
                np.copy(image), np.copy(bboxes))
            image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
            image, bboxes = self.random_translate(np.copy(image),
                                                  np.copy(bboxes))

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image, bboxes = utils.image_preprocess(
            np.copy(image),
            [self.train_input_size, self.train_input_size],
            np.copy(bboxes),
        )
        return image, bboxes
예제 #3
0
    def parse_annotaion(self, annotation, mAP='False'):
        if TRAIN_LOAD_IMAGES_TO_RAM:
            img_path = annotation[0]
            image = annotation[2]
        else:
            img_path = annotation[0]
            image = cv2.imread(img_path)

        bboxes = np.array(
            [list(map(int, box.split(','))) for box in annotation[1]])

        if self.data_aug:
            image, bboxes = self.random_horzontal_flip(np.copy(image),
                                                       np.copy(bboxes))
            image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
            image, bboxes = self.random_translate(np.copy(image),
                                                  np.copy(bboxes))

        if mAP == True:
            return image, bboxes

        image, bboxes = image_preprocess(np.copy(image),
                                         [self.input_sizes, self.input_sizes],
                                         np.copy(bboxes))

        return image, bboxes
예제 #4
0
파일: Predict.py 프로젝트: ftsai231/yolo_v3
    def predict(self):
        np.set_printoptions(threshold=np.inf)
        image_path = './414162.jpg'
        image = np.array(cv2.imread(image_path))
        image_shape = image.shape
        print("image_shape: ", image_shape)
        image = np.copy(image)
        image_data = utils.image_preprocess(image,
                                            [self.input_size, self.input_size])
        image_data = image_data[np.newaxis, ...]

        pred_bbox = self.sess.run([self.pred_bbox],
                                  feed_dict={
                                      self.input: image_data,
                                      self.training: False
                                  })
        pred_bbox = np.array(pred_bbox[0])
        pred_bbox = utils.postprocess_boxes(pred_bbox, image_shape, 416, 0.5)
        print("pred_bbox shape: ", pred_bbox.shape)

        pred_bbox = utils.nms(pred_bbox, 0.45)
        print("pred_bbox after: ", pred_bbox)

        image = utils.draw_bbox(image, pred_bbox, show_label=True)
        cv2.imwrite('./test.jpg', image)
    def detect_image(self,
                     image_path=None,
                     output_path=None,
                     input_size=416,
                     show=False,
                     score_threshold=0.3,
                     iou_threshold=0.45,
                     rectangle_colors=''):
        if image_path is not None:

            original_image = cv2.imread(image_path)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

            image_data = image_preprocess(np.copy(original_image),
                                          [input_size, input_size])
            image_data = tf.expand_dims(image_data, 0)

            # it gives output in three different scale
            pred_bbox = self.tiny_YoloV3.predict(image_data)
            print(pred_bbox[0].shape)
            print(pred_bbox[1].shape)
            pred_bbox = [
                tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox
            ]
            pred_bbox = tf.concat(pred_bbox, axis=0)
            # print(pred_bbox)

            bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                       score_threshold)
            print(bboxes.shape)
            bboxes = nms(bboxes, iou_threshold, method='nms')
            print(bboxes[0].shape)
            print(len(bboxes))

            image = draw_bbox(original_image,
                              bboxes,
                              CLASSES=self.CLASSES,
                              rectangle_colors=rectangle_colors)

            # print(image.shape)
            if output_path is not None:
                cv2.imwrite(output_path, image)
            if show:
                # Show the image
                cv2.imshow("predicted image", image)
                # Load and hold the image
                cv2.waitKey(0)
                # To close the window after the required kill value was provided
                cv2.destroyAllWindows()

            return image
예제 #6
0
    def parse_annotation(self,annotation):
        line = annotation.split()
        image_path = line[0]
        if not os.path.exists(image_path):
            raise KeyError("s% does not exist"%image_path)

        image = np.array(cv2.imread(image_path))
        bboxes = np.array([list(map(lambda x:int(float(x)),box.split(','))) for box in line[1:]])

        if self.data_aug:
            image,bboxes = self.random_horizotal_flip(np.copy(image),np.copy(bboxes))
            image,bboxes = self.random_crop(np.copy(image),np.copy(bboxes))
            image,bboxes = self.random_translate(np.copy(image),np.copy(bboxes))
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        image,bboxes = utils.image_preprocess(np.copy(image),
                [self.train_input_size,self.train_input_size],np.copy(bboxes))
        return image,bboxes
예제 #7
0
def telemetry(sid, data):
    # The current steering angle of the car
    steering_angle = data["steering_angle"]
    # The current throttle of the car
    throttle = data["throttle"]
    # The current speed of the car
    speed = data["speed"]
    # The current image from the center camera of the car
    imgString = data["image"]
    image = Image.open(BytesIO(base64.b64decode(imgString)))
    image_pre = np.asarray(image)
    new_image = utils.image_preprocess(image_pre, 64, 64)

    transformed_image_array = new_image[None, :, :, :]
    # This model currently assumes that the features of the model are just the images. Feel free to change this.
    steering_angle = 1 * float(
        model.predict(transformed_image_array, batch_size=1))
    # The driving model currently just outputs a constant throttle. Feel free to edit this.
    throttle = 0.05
    print(steering_angle, throttle)
    send_control(steering_angle, throttle)
예제 #8
0
return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements)

with tf.Session(graph=graph) as sess:
    vid = cv2.VideoCapture(video_path)
    success, frame = vid.read()
    size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    # VideoWriter_fourcc为视频编解码器,20为帧播放速率
    # fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    out = cv2.VideoWriter('output_3.mp4', fourcc, 20.0, size)
    num_frame = 0

    while success:
        frame_size = frame.shape[:2]
        image_data = utils.image_preprocess(np.copy(frame),
                                            [input_size, input_size])
        image_data = image_data[np.newaxis, ...]

        pred_sbbox, pred_mbbox, pred_lbbox = sess.run(
            [return_tensors[1], return_tensors[2], return_tensors[3]],
            feed_dict={return_tensors[0]: image_data})

        pred_bbox = np.concatenate([
            np.reshape(pred_sbbox, (-1, 5 + num_classes)),
            np.reshape(pred_mbbox, (-1, 5 + num_classes)),
            np.reshape(pred_lbbox, (-1, 5 + num_classes))
        ],
                                   axis=0)

        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size,
                                         0.4)
예제 #9
0
import utils as utils
import tensorflow as tf
from PIL import Image

return_elements = ["input/input_data:0", "yolo_v3_model/pred_sbbox/concat_2:0", "yolo_v3_model/pred_mbbox/concat_2:0",
                     "yolo_v3_model/pred_lbbox/concat_2:0"]
pb_file         = "./yolov3_coco.pb"
image_path      = "./576527.jpg"
num_classes     = 2
input_size      = 416
graph           = tf.Graph()

original_image = cv2.imread(image_path)
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
original_image_size = original_image.shape[:2]
image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size])
image_data = image_data[np.newaxis, ...]

return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements)

with tf.Session(graph=graph) as sess:
    pred_sbbox, pred_mbbox, pred_lbbox = sess.run(
        [return_tensors[1], return_tensors[2], return_tensors[3]],
                feed_dict={ return_tensors[0]: image_data})

print("pred_bbox: ", pred_sbbox)
print("pred_bbox shape: ", np.array(pred_sbbox).shape)

pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)),
                            np.reshape(pred_mbbox, (-1, 5 + num_classes)),
                            np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0)
    def detect_video(self,
                     video_path,
                     output_path=None,
                     input_size=416,
                     show=False,
                     score_threshold=0.3,
                     iou_threshold=0.45,
                     rectangle_colors=''):
        times = []
        vid = cv2.VideoCapture(video_path)

        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*'XVID')
        # output_path must be .mp4
        out = cv2.VideoWriter(output_path, codec, fps, (width, height))

        while True:
            _, img = vid.read()

            try:
                original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                original_image = cv2.cvtColor(original_image,
                                              cv2.COLOR_BGR2RGB)
            except:
                break
            image_data = image_preprocess(np.copy(original_image),
                                          [input_size, input_size])
            image_data = tf.expand_dims(image_data, 0)

            t1 = time.time()
            pred_bbox = self.tiny_YoloV3.predict(image_data)
            t2 = time.time()

            pred_bbox = [
                tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox
            ]
            pred_bbox = tf.concat(pred_bbox, axis=0)

            bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                       score_threshold)
            bboxes = nms(bboxes, iou_threshold, method='nms')

            times.append(t2 - t1)
            times = times[-20:]

            ms = sum(times) / len(times) * 1000
            fps = 1000 / ms

            print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))

            image = draw_bbox(original_image,
                              bboxes,
                              CLASSES=self.CLASSES,
                              rectangle_colors=rectangle_colors)
            image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30),
                                cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255),
                                2)

            if output_path is not None:
                out.write(image)
            if show:
                cv2.imshow('output', image)
                if cv2.waitKey(25) & 0xFF == ord("q"):
                    cv2.destroyAllWindows()
                    break

        cv2.destroyAllWindows()
예제 #11
0
 def load_data(self, path, resize_height, resize_width, normalization):
     image = image_preprocess(path, resize_height, resize_width, normalization)
     return image
예제 #12
0
def _main_():

    input_path = args.input

    uff_fpath = 'TensorRT/uff/{}.uff'.format(config.NET_BASENAME)
    engine_fpath = 'TensorRT/engines/{}.trt'.format(config.NET_BASENAME)

    if os.path.exists(engine_fpath):
        engine = tensorNet.createTrtFromPlan(engine_fpath)
    else:
        if os.path.exists(uff_fpath):
            engine = tensorNet.createTrtFromUFF(uff_fpath,
                                                config.INPUT_TENSOR_NAMES[0],
                                                'activation_1/Sigmoid')
            tensorNet.saveEngine(engine, engine_fpath)
        else:
            print('No .uff file!')
            exit(1)

    image_paths = []
    if os.path.isdir(input_path):
        for inp_file in os.listdir(input_path):
            image_paths += [os.path.join(input_path, inp_file)]
    else:
        image_paths += [input_path]

    image_paths = [
        inp_file for inp_file in image_paths
        if (inp_file[-4:] in ['.jpg', '.png', 'JPEG', '.ppm'])
    ]

    processing_count = 0
    sum_time = 0

    network_input_shp = (config.NETWORK_INPUT_W, config.NETWORK_INPUT_H,
                         config.NETWORK_INPUT_C)

    render_mode = True

    for image_path in tqdm(image_paths):
        image = cv2.imread(image_path)

        start_time = time.time()

        image_h, image_w, _ = image.shape

        input_img = image_preprocess(image, network_input_shp)

        # Convert 2 CHW
        image_chw = np.moveaxis(input_img, -1, 0)
        image_chw = np.ascontiguousarray(image_chw, dtype=np.float32)

        tensorNet.inference(engine, image_chw)

        mask_result = np.zeros((160, 320, 1), dtype=np.float32)
        print(mask_result.shape)
        tensorNet.getOutput(engine, 0, mask_result)

        image_chw = np.moveaxis(input_img, -1, 0)

        sum_time += time.time() - start_time
        processing_count += 1

        if render_mode:
            cv2.imshow('result', np.uint8(mask_result))
            cv2.imshow('input', image)

            if cv2.waitKey(0) == 27:
                break  # esc to quit

    fps = processing_count / sum_time
    print('Result: {}'.format(fps))
예제 #13
0
def get_mAP(Yolo,
            dataset,
            score_threshold=0.25,
            iou_threshold=0.50,
            TEST_INPUT_SIZE=TEST_INPUT_SIZE):
    MINOVERLAP = 0.5  # default value (defined in the PASCAL VOC2012 challenge)
    NUM_CLASS = read_class_names(TRAIN_CLASSES)

    ground_truth_dir_path = 'mAP/ground-truth'
    if os.path.exists(ground_truth_dir_path):
        shutil.rmtree(ground_truth_dir_path)

    if not os.path.exists('mAP'): os.mkdir('mAP')
    os.mkdir(ground_truth_dir_path)

    print(f'\ncalculating mAP{int(iou_threshold*100)}...\n')

    gt_counter_per_class = {}
    for index in range(dataset.num_samples):
        ann_dataset = dataset.annotations[index]

        original_image, bbox_data_gt = dataset.parse_annotation(
            ann_dataset, True)

        if len(bbox_data_gt) == 0:
            bboxes_gt = []
            classes_gt = []
        else:
            bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
        ground_truth_path = os.path.join(ground_truth_dir_path,
                                         str(index) + '.txt')
        num_bbox_gt = len(bboxes_gt)

        bounding_boxes = []
        for i in range(num_bbox_gt):
            class_name = NUM_CLASS[classes_gt[i]]
            xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
            bbox = xmin + " " + ymin + " " + xmax + " " + ymax
            bounding_boxes.append({
                "class_name": class_name,
                "bbox": bbox,
                "used": False
            })

            # count that object
            if class_name in gt_counter_per_class:
                gt_counter_per_class[class_name] += 1
            else:
                # if class didn't exist yet
                gt_counter_per_class[class_name] = 1
            bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
        with open(f'{ground_truth_dir_path}/{str(index)}_ground_truth.json',
                  'w') as outfile:
            json.dump(bounding_boxes, outfile)

    gt_classes = list(gt_counter_per_class.keys())
    # sort the classes alphabetically
    gt_classes = sorted(gt_classes)
    n_classes = len(gt_classes)

    times = []
    json_pred = [[] for i in range(n_classes)]
    for index in range(dataset.num_samples):
        ann_dataset = dataset.annotations[index]

        image_name = ann_dataset[0].split('/')[-1]
        original_image, bbox_data_gt = dataset.parse_annotation(
            ann_dataset, True)

        image = image_preprocess(np.copy(original_image),
                                 [TEST_INPUT_SIZE, TEST_INPUT_SIZE])
        image_data = image[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()

        times.append(t2 - t1)

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, TEST_INPUT_SIZE,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        for bbox in bboxes:
            coor = np.array(bbox[:4], dtype=np.int32)
            score = bbox[4]
            class_ind = int(bbox[5])
            class_name = NUM_CLASS[class_ind]
            score = '%.4f' % score
            xmin, ymin, xmax, ymax = list(map(str, coor))
            bbox = xmin + " " + ymin + " " + xmax + " " + ymax
            json_pred[gt_classes.index(class_name)].append({
                "confidence":
                str(score),
                "file_id":
                str(index),
                "bbox":
                str(bbox)
            })

    ms = sum(times) / len(times) * 1000
    fps = 1000 / ms

    for class_name in gt_classes:
        json_pred[gt_classes.index(class_name)].sort(
            key=lambda x: float(x['confidence']), reverse=True)
        with open(f'{ground_truth_dir_path}/{class_name}_predictions.json',
                  'w') as outfile:
            json.dump(json_pred[gt_classes.index(class_name)], outfile)

    # Calculate the AP for each class
    sum_AP = 0.0
    ap_dictionary = {}
    # open file to store the results
    with open("mAP/results.txt", 'w') as results_file:
        results_file.write("# AP and precision/recall per class\n")
        count_true_positives = {}
        for class_index, class_name in enumerate(gt_classes):
            count_true_positives[class_name] = 0
            # Load predictions of that class
            predictions_file = f'{ground_truth_dir_path}/{class_name}_predictions.json'
            predictions_data = json.load(open(predictions_file))

            # Assign predictions to ground truth objects
            nd = len(predictions_data)
            tp = [0] * nd  # creates an array of zeros of size nd
            fp = [0] * nd
            for idx, prediction in enumerate(predictions_data):
                file_id = prediction["file_id"]
                # assign prediction to ground truth object if any
                #   open ground-truth with that file_id
                gt_file = f'{ground_truth_dir_path}/{str(file_id)}_ground_truth.json'
                ground_truth_data = json.load(open(gt_file))
                ovmax = -1
                gt_match = -1
                # load prediction bounding-box
                bb = [float(x) for x in prediction["bbox"].split()
                      ]  # bounding box of prediction
                for obj in ground_truth_data:
                    # look for a class_name match
                    if obj["class_name"] == class_name:
                        bbgt = [float(x) for x in obj["bbox"].split()
                                ]  # bounding box of ground truth
                        bi = [
                            max(bb[0], bbgt[0]),
                            max(bb[1], bbgt[1]),
                            min(bb[2], bbgt[2]),
                            min(bb[3], bbgt[3])
                        ]
                        iw = bi[2] - bi[0] + 1
                        ih = bi[3] - bi[1] + 1
                        if iw > 0 and ih > 0:
                            # compute overlap (IoU) = area of intersection / area of union
                            ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (
                                bbgt[2] - bbgt[0] + 1) * (bbgt[3] - bbgt[1] +
                                                          1) - iw * ih
                            ov = iw * ih / ua
                            if ov > ovmax:
                                ovmax = ov
                                gt_match = obj

                # assign prediction as true positive/don't care/false positive
                if ovmax >= MINOVERLAP:  # if ovmax > minimum overlap
                    if not bool(gt_match["used"]):
                        # true positive
                        tp[idx] = 1
                        gt_match["used"] = True
                        count_true_positives[class_name] += 1
                        # update the ".json" file
                        with open(gt_file, 'w') as f:
                            f.write(json.dumps(ground_truth_data))
                    else:
                        # false positive (multiple detection)
                        fp[idx] = 1
                else:
                    # false positive
                    fp[idx] = 1

            # compute precision/recall
            cumsum = 0
            for idx, val in enumerate(fp):
                fp[idx] += cumsum
                cumsum += val
            cumsum = 0
            for idx, val in enumerate(tp):
                tp[idx] += cumsum
                cumsum += val
            #print(tp)
            rec = tp[:]
            for idx, val in enumerate(tp):
                rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
            #print(rec)
            prec = tp[:]
            for idx, val in enumerate(tp):
                prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
            #print(prec)

            ap, mrec, mprec = voc_ap(rec, prec)
            sum_AP += ap
            text = "{0:.3f}%".format(
                ap * 100
            ) + " = " + class_name + " AP  "  #class_name + " AP = {0:.2f}%".format(ap*100)

            rounded_prec = ['%.3f' % elem for elem in prec]
            rounded_rec = ['%.3f' % elem for elem in rec]
            # Write to results.txt
            results_file.write(text + "\n Precision: " + str(rounded_prec) +
                               "\n Recall   :" + str(rounded_rec) + "\n\n")

            print(text)
            ap_dictionary[class_name] = ap

        results_file.write("\n# mAP of all classes\n")
        mAP = sum_AP / n_classes

        text = "mAP = {:.3f}%, {:.2f} FPS".format(mAP * 100, fps)
        results_file.write(text + "\n")
        print(text)

        return mAP * 100