def __init__(self, prior_boxes, dataset_name='VOC2007',
                 box_scale_factors=[.1, .1, .2, .2],
                 background_index=0, lower_probability_threshold=.1,
                 iou_threshold=.2, class_names=None):

        self.prior_boxes = prior_boxes
        self.box_scale_factors = box_scale_factors
        self.background_index = background_index
        self.iou_threshold = iou_threshold
        self.lower_probability_threshold = lower_probability_threshold
        self.class_names = class_names
        if self.class_names is None:
            self.class_names = get_class_names(dataset_name)
        self.num_classes = len(self.class_names)
        self.colors = plt.cm.hsv(np.linspace(0, 1, self.num_classes)).tolist()
        self.colors = np.asarray(self.colors) * 255
        self.arg_to_class = dict(zip(list(range(self.num_classes)),
                                 self.class_names))
        self.font = cv2.FONT_HERSHEY_SIMPLEX
image_prefix = '../datasets/VOCtest/VOCdevkit/VOC2007/JPEGImages/'
weights_path = '../trained_models/SSD300_weights.hdf5'
model = SSD300(weights_path=weights_path)
# from utils.wtf import flip_l2_weights
# model = flip_l2_weights(model)
prior_boxes = create_prior_boxes()
input_shape = model.input_shape[1:3]
class_threshold = .1
iou_threshold = .45

average_precisions = []
# range(1, 21)
for ground_truth_class_arg in range(1, 21):
    labels = []
    scores = []
    class_names = get_class_names(dataset_name)
    # ground_truth_class_arg = class_arg
    selected_classes = [class_names[0]] + [class_names[ground_truth_class_arg]]
    num_ground_truth_boxes = 0
    class_decoder = get_arg_to_class(class_names)
    num_classes = len(class_names)
    data_manager = DataManager(dataset_name, selected_classes, data_prefix,
                               image_prefix)
    ground_truth_data = data_manager.load_data()
    difficult_data_flags = data_manager.parser.difficult_objects

    image_names = sorted(list(ground_truth_data.keys()))
    # print('Number of images found:', len(image_names))
    for image_name in image_names:
        ground_truth_sample = ground_truth_data[image_name]
        image_prefix = data_manager.image_prefix
from utils.evaluation import record_detections
from utils.datasets import get_class_names
from models.ssd import SSD300
# from tf_eval import evaluate_detection_results_pascal_voc
import pickle


dataset_name = 'VOC2007'
data_prefix = '../datasets/VOCdevkit/VOC2007/Annotations/'
image_prefix = '../datasets/VOCdevkit/VOC2007/JPEGImages/'
weights_path = '../trained_models/SSD300_weights.hdf5'
iou_threshold = .5
iou_nms_threshold = .45
# class_names = ['background', 'aeroplane', 'bird']
class_names = get_class_names()
model = SSD300(weights_path=weights_path)
data_records = record_detections(model, dataset_name, data_prefix,
                                 image_prefix, class_names, iou_threshold,
                                 iou_nms_threshold)
pickle.dump(data_records, open('data_records.pkl', 'wb'))

"""
data_records = pickle.load(open('data_records.pkl', 'rb'))
class_ids = list(range(len(class_names)))
class_ids = [int(class_id) for class_id in class_ids]
categories = []
for class_id, class_name in zip(class_ids, class_names):
    category = dict = {}
    category['id'] = class_id
    category['name'] = class_name
    categories.append(category)
import time
import argparse
import numpy as np
import pickle

if sys.version_info[0] == 2:
    import xml.etree.cElementTree as ET
else:
    import xml.etree.ElementTree as ET


def str2bool(v):
    return v.lower() in ("yes", "true", "t", "1")


labelmap = get_class_names('VOC2007')
labelmap = labelmap[1:]
VOC_CLASSES = labelmap.copy()
weights_path = '../trained_models/ssd_300_VOC0712.pth'
parser = argparse.ArgumentParser(description='Single Shot MultiBox Detection')
voc_root = '../datasets/VOCdevkit'
"""
parser.add_argument('--trained_model',
                    default='weights/ssd300_mAP_77.43_v2.pth',
                    type=str, help='Trained state_dict file path to open')
"""

parser.add_argument('--trained_model',
                    default=weights_path,
                    type=str, help='Trained state_dict file path to open')
parser.add_argument('--save_folder', default='eval/', type=str,
Exemplo n.º 5
0
def _main(args):
    start_time = timer()
    input_path = os.path.expanduser(args.input_path)
    output_path = os.path.expanduser(args.output_path)

    if not os.path.exists(output_path):
        print('Creating output path {}'.format(output_path))
        os.mkdir(output_path)

    logging.basicConfig(filename=output_path + "/tracking.log", level=logging.DEBUG)

    #parse car positions and angles
    print("Parsing timestamps and oxts files...")
    if args.oxts.startswith('..'):
        parse_oxts(input_path + "/" + args.oxts, input_path + "/" + args.time_stamps)
    else:
        parse_oxts(args.oxts, args.time_stamps)
    print("Done. Data acquired.")

    dataset_name = 'VOC2007'
    NUM_CLASSES = 21

    weights_filename = args.model_path
    model = SSD300(num_classes=NUM_CLASSES)
    prior_boxes = create_prior_boxes(model)
    model.load_weights(weights_filename)

    # drawing stuff

    # Generate colors for drawing bounding boxes.
    hsv_tuples = [(x / NUM_CLASSES, 1., 1.)
                  for x in range(NUM_CLASSES)]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(
        map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
            colors))
    random.seed(10101)  # Fixed seed for consistent colors across runs.
    random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
    random.seed(None)  # Reset seed to default.

    class_names = get_class_names(dataset_name)
    box_scale_factors=[.1, .1, .2, .2] # maybe adjust to our image size
    background_index=0
    lower_probability_threshold=.1
    iou_threshold=.2
    num_classes = len(class_names) # sould be equal to NUM_CLASSES
    arg_to_class = dict(zip(list(range(num_classes)), class_names))

    frame_idx = 0

    for image_file in os.listdir(input_path):
        try:
            image_type = imghdr.what(os.path.join(input_path, image_file))
            if not image_type:
                print("frame dropped")
                continue
        except IsADirectoryError:
            print("frame dropped")
            continue
        image = Image.open(os.path.join(input_path, image_file))
        image_data = np.array(image)

        selected_boxes = predict(model, image_data, prior_boxes,
                                image_data.shape[0:2], num_classes,
                                lower_probability_threshold,
                                iou_threshold,
                                background_index,
                                box_scale_factors)
        if selected_boxes is not None:
            x_mins = selected_boxes[:, 0]
            y_mins = selected_boxes[:, 1]
            x_maxs = selected_boxes[:, 2]
            y_maxs = selected_boxes[:, 3]
            classes = selected_boxes[:, 4:]
            num_boxes = len(selected_boxes) 
        else:
            num_boxes = 0
            print("frame dropped, no boxes")

        print('Found {} boxes for {}'.format(num_boxes, image_file))

        font = ImageFont.truetype(
            font='font/FiraMono-Medium.otf',
            size=11) # np.floor(3e-2 * image.size[1] + 0.5).astype('int32')
        thickness = (image_data.shape[0] + image_data.shape[1]) // 300

        logging.info("Img: " + str(image_file))

        boxes_data = []

        for i in range(num_boxes):
            xmin = int(x_mins[i])
            ymin = int(y_mins[i])
            xmax = int(x_maxs[i])
            ymax = int(y_maxs[i])
            box_class_scores = classes[i]
            label_class = np.argmax(box_class_scores)
            score = box_class_scores[label_class]
            predicted_class = arg_to_class[label_class]

            box = [ymin, xmin, ymax, xmax]
            box = [max(0,v) for v in box] # sometimes it's negative.

            # log positions

            obj_coord = np.array([])

            if predicted_class in ["person", "bicycle", "car", "motorbike", "bus", "train", "truck"] and score > 0.2: #object and classes to track
                if predicted_class in ["bus", "bruck"]: #vehicle
                    predicted_class = "car"
                obj_coord = computeCoordinates(box, frame_idx)
                if obj_coord is not None:
                    hist = histogram(image_data, box)
                    #create data and store it
                    boxes_data.append({
                        'predicted_class': predicted_class,
                        'score': float(score),
                        'coord': obj_coord,
                        'hist': hist
                        })
                    logging.info(predicted_class + " :" + str(obj_coord) + " | " + str(np.linalg.norm(obj_coord)))

            # end log positions
            if saveImages:
                if obj_coord is not None:
                    label = '{} {:.2f} {} {:.2f}'.format(predicted_class, score, str(obj_coord), np.linalg.norm(obj_coord))
                else:
                    label = '{} {:.2f}'.format(predicted_class, score)
                draw = ImageDraw.Draw(image)
                label_size = draw.textsize(label, font)

                top, left, bottom, right = box
                top = max(0, np.floor(top + 0.5).astype('int32'))
                left = max(0, np.floor(left + 0.5).astype('int32'))
                bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
                right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
                print(label, (left, top), (right, bottom))

                if top - label_size[1] >= 0:
                    text_origin = np.array([left, top - label_size[1]])
                else:
                    text_origin = np.array([left, top + 1])

                for i in range(thickness):
                    draw.rectangle(
                        [left + i, top + i, right - i, bottom - i],
                        outline=colors[label_class])
                draw.rectangle(
                    [tuple(text_origin), tuple(text_origin + label_size)],
                    fill=colors[label_class])
                draw.text(text_origin, label, fill=(0, 0, 0), font=font)
                del draw

        frame_idx += 1
        global data_frames
        data_frames.append(boxes_data)
        if saveImages:
            image.save(os.path.join(output_path, image_file), quality=80)

    now = timer()
    start_trj_time = timer()
    print("Time elapsed CNN: " + str(now - start_time) + " seconds")
    print("Calculating trajectories...")
    calculate_trajectories()

    now = timer()
    print("Done. Time elapsed: " + str(now - start_trj_time) + " seconds\n\n")
    print("Total time elapsed: " + str(now - start_time) + " seconds")
Exemplo n.º 6
0
    if target_size is not None:
        image_array = imresize(image_array, target_size)
    return image_array, (height, width)


# parameters
trained_weights_path = '../trained_models/ssd_300_VOC0712.pth'
input_size = 300
num_classes = 21
iou_threshold = .5
lower_probability_threshold = .01
background_index = 0
dataset_name = 'VOC2007'
data_prefix = '../datasets/VOCdevkit/VOC2007/Annotations/'
image_prefix = '../datasets/VOCdevkit/VOC2007/JPEGImages/'
selected_classes = get_class_names(dataset_name)
prior_boxes = create_prior_boxes()

# loading pytorch model
pytorch_ssd = build_ssd('train', input_size, num_classes)
pytorch_ssd.load_weights(trained_weights_path)

# loading keras model
weights_path = '../trained_models/SSD300_weights.hdf5'
with tf.device('/cpu:0'):
    model = SSD300(weights_path=weights_path)

image_path = '../images/boys.jpg'
data_manager = DataManager(dataset_name, selected_classes, data_prefix,
                           image_prefix)
# import torch.utils.data as data

from data import VOCDetection, BaseTransform
from ssd import build_ssd

import sys
import os
import time
import argparse
import numpy as np
import pickle
# import cv2

trained_model_path = '../trained_models/SSD300_weights.hdf5'
voc_root = '../datasets/VOCdevkit/'
labelmap = get_class_names()
VOC_CLASSES = labelmap[1:]

if sys.version_info[0] == 2:
    import xml.etree.cElementTree as ET
else:
    import xml.etree.ElementTree as ET


def str2bool(v):
    return v.lower() in ("yes", "true", "t", "1")


class AnnotationTransform(object):
    def __init__(self, class_to_ind=None, keep_difficult=False):
        self.class_to_ind = class_to_ind or dict(