Exemplo n.º 1
0
    def decode_netout(self, netout, obj_threshold=0.3, nms_threshold=0.3):
        grid_h, grid_w, nb_box = netout.shape[:3]

        boxes = []

        # decode the output by the network
        netout[..., 4] = self.sigmoid(netout[..., 4])
        netout[..., 5:] = netout[..., 4][..., np.newaxis] * self.softmax(
            netout[..., 5:])
        netout[..., 5:] *= netout[..., 5:] > obj_threshold

        for row in range(grid_h):
            for col in range(grid_w):
                for b in range(nb_box):
                    # from 4th element onwards are confidence and class classes
                    classes = netout[row, col, b, 5:]

                    if np.sum(classes) > 0:
                        # first 4 elements are x, y, w, and h
                        x, y, w, h = netout[row, col, b, :4]

                        x = (col + self.sigmoid(x)
                             ) / grid_w  # center position, unit: image width
                        y = (row + self.sigmoid(y)
                             ) / grid_h  # center position, unit: image height
                        w = self.anchors[2 * b + 0] * np.exp(
                            w) / grid_w  # unit: image width
                        h = self.anchors[2 * b + 1] * np.exp(
                            h) / grid_h  # unit: image height
                        confidence = netout[row, col, b, 4]

                        box = BoundBox(x, y, w, h, confidence, classes)

                        boxes.append(box)

        # suppress non-maximal boxes
        for c in range(self.nb_class):
            sorted_indices = list(
                reversed(np.argsort([box.classes[c] for box in boxes])))

            for i in range(len(sorted_indices)):
                index_i = sorted_indices[i]

                if boxes[index_i].classes[c] == 0:
                    continue
                else:
                    for j in range(i + 1, len(sorted_indices)):
                        index_j = sorted_indices[j]

                        if self.bbox_iou(boxes[index_i],
                                         boxes[index_j]) >= nms_threshold:
                            boxes[index_j].classes[c] = 0

        # remove the boxes which are less likely than a obj_threshold
        boxes = [box for box in boxes if box.get_score() > obj_threshold]

        return boxes
Exemplo n.º 2
0
    def decode_netout(self, netout, obj_threshold=0.3, nms_threshold=0.3):
        grid_h, grid_w, nb_box = netout.shape[:3]

        boxes = []
        
        # decode the output by the network
        netout[..., 4]  = self.sigmoid(netout[..., 4])
        netout[..., 5:] = netout[..., 4][..., np.newaxis] * self.softmax(netout[..., 5:])
        netout[..., 5:] *= netout[..., 5:] > obj_threshold
        
        for row in range(grid_h):
            for col in range(grid_w):
                for b in range(nb_box):
                    # from 4th element onwards are confidence and class classes
                    classes = netout[row,col,b,5:]
                    
                    if np.sum(classes) > 0:
                        # first 4 elements are x, y, w, and h
                        x, y, w, h = netout[row,col,b,:4]

                        x = (col + self.sigmoid(x)) / grid_w # center position, unit: image width
                        y = (row + self.sigmoid(y)) / grid_h # center position, unit: image height
                        w = self.anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width
                        h = self.anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height
                        confidence = netout[row,col,b,4]
                        
                        box = BoundBox(x, y, w, h, confidence, classes)
                        
                        boxes.append(box)

        # suppress non-maximal boxes
        for c in range(self.nb_class):
            sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))

            for i in xrange(len(sorted_indices)):
                index_i = sorted_indices[i]
                
                if boxes[index_i].classes[c] == 0: 
                    continue
                else:
                    for j in xrange(i+1, len(sorted_indices)):
                        index_j = sorted_indices[j]
                        
                        if self.bbox_iou(boxes[index_i], boxes[index_j]) >= nms_threshold:
                            boxes[index_j].classes[c] = 0
                            
        # remove the boxes which are less likely than a obj_threshold
        boxes = [box for box in boxes if box.get_score() > obj_threshold]
        
        return boxes
Exemplo n.º 3
0
    def __init__(
            self,
            instances,
            anchors,
            labels,
            downsample=32,  # ratio between network input's size and network output's size, 32 for YOLOv3
            max_box_per_image=30,
            batch_size=1,
            min_net_size=320,
            max_net_size=608,
            shuffle=True,
            jitter=True,
            norm=None):
        self.instances = instances
        self.batch_size = batch_size
        self.labels = labels
        self.downsample = downsample
        self.max_box_per_image = max_box_per_image
        self.min_net_size = (min_net_size // self.downsample) * self.downsample
        self.max_net_size = (max_net_size // self.downsample) * self.downsample
        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm
        self.anchors = [
            BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1])
            for i in range(len(anchors) // 2)
        ]
        self.net_h = 416
        self.net_w = 416

        if shuffle:
            np.random.shuffle(self.instances)
def get_ground_truth(boxes):
    gt = np.zeros((grid_h, grid_w, num_box, 4 + 1 + num_classes),
                  dtype=np.float32)

    for bbox in boxes:
        bx, by, bw, bh = bbox
        center_x = bx + bw / 2.
        center_x = center_x / float(image_w / grid_w)
        center_y = by + bh / 2.
        center_y = center_y / float(image_h / grid_h)
        cell_x = int(np.floor(center_x))
        cell_y = int(np.floor(center_y))
        center_w = bw / grid_size
        center_h = bh / grid_size
        box = [center_x, center_y, center_w, center_h]

        # find the anchor that best predicts this box
        best_anchor = -1
        max_iou = -1

        shifted_box = BoundBox(0, 0, center_w, center_h)

        for i in range(len(anchor_boxes)):
            anchor = anchor_boxes[i]
            iou = bbox_iou(shifted_box, anchor)

            if max_iou < iou:
                best_anchor = i
                max_iou = iou

        # assign ground truth x, y, w, h, confidence and class probs
        gt[cell_y, cell_x, best_anchor, 0] = 1.0
        gt[cell_y, cell_x, best_anchor, 1:5] = box
        gt[cell_y, cell_x, best_anchor, 5] = 1.0
    return gt
Exemplo n.º 5
0
    def __init__(self,
                 img_files,
                 config,
                 batch_size,
                 shuffle=False,
                 jitter=True,
                 norm=None):
        self.generator = None

        self.config = config
        self.filelist = img_files
        self.evts_per_file = config['EVTS_PER_FILE']
        self.batch_size = batch_size
        self.nevts = len(img_files) * config['EVTS_PER_FILE']
        self.nbatches = int(self.nevts * (1. / self.batch_size))
        self.num_classes = len(config['LABELS'])
        self.num_grid_x = config['GRID_W']
        self.num_grid_y = config['GRID_H']

        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm

        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]
        '''
    def __init__(self,
                 config,
                 images_dir,
                 images,
                 annotations,
                 shuffle=True,
                 jitter=False,
                 norm=None):

        self.config = config
        self.images = images
        self.images_dir = images_dir
        self.annotations = annotations
        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm
        self.image_height = self.config["model"]["image_size"]
        self.image_width = self.config["model"]["image_size"]
        self.true_box_buffer = self.config["model"]["nb_box"]
        self.number_of_grids = self.config["model"]["horizontal_grids"]
        self.box = self.config["model"]["box"]
        self.input_image_width = self.config["model"]["input_image_width"]
        self.input_image_height = self.config["model"]["input_image_height"]
        self.anchors = [
            BoundBox(0, 0, config["model"]["anchors"][2 * i],
                     config["model"]["anchors"][2 * i + 1])
            for i in range(int(len(config["model"]["anchors"]) // 2))
        ]
Exemplo n.º 7
0
    def __init__(self,
                 config,
                 image_fps,
                 annotations,
                 shuffle=True,
                 jitter=True,
                 norm=None):
        '''Creates a generator that supplies training and validation sets in discrete batches
        config: a dictionary of constants that tells properties of image
        image_fps: a list of file paths to each of the training images
        annotations: a dictionary contain the labels for each image. Indexed by file path I believe
        shuffle: bool for whether we should shuffle between epochs
        '''
        self.generator = None

        self.image_fps = image_fps
        self.config = config

        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm
        self.ORIG_SIZE = 1024
        self.image_annotations = annotations

        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]

        if shuffle: np.random.shuffle(self.image_fps)
def to_bboxes(bboxes):
    from utils import BoundBox
    new_bboxes = []
    for box in bboxes:
        x, y, w, h = box
        bbox = BoundBox(x, y, x + w, y + h)
        new_bboxes.append(bbox)
    return new_bboxes
Exemplo n.º 9
0
    def __init__(self, images, config, shuffle=True, jitter=True, norm=None):
        self.generator = None

        self.images = images
        self.config = config

        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm

        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        self.aug_pipe = iaa.Sequential(
            [
                iaa.SomeOf(
                    (0, 5),
                    [
                        iaa.OneOf([
                            iaa.GaussianBlur(
                                (0, 3.0)
                            ),  # blur images with a sigma between 0 and 3.0
                            iaa.AverageBlur(
                                k=(2, 7)
                            ),  # blur image using local means with kernel sizes between 2 and 7
                            iaa.MedianBlur(
                                k=(3, 11)
                            ),  # blur image using local medians with kernel sizes between 2 and 7
                        ]),
                        iaa.Sharpen(alpha=(0, 1.0),
                                    lightness=(0.75, 1.5)),  # sharpen images
                        iaa.AdditiveGaussianNoise(
                            loc=0, scale=(0.0, 0.05 * 255),
                            per_channel=0.5),  # add gaussian noise to images
                        iaa.OneOf([
                            iaa.Dropout(
                                (0.01, 0.1), per_channel=0.5
                            ),  # randomly remove up to 10% of the pixels
                        ]),
                        iaa.Add(
                            (-10, 10), per_channel=0.5
                        ),  # change brightness of images (by -10 to 10 of original value)
                        iaa.Multiply(
                            (0.5, 1.5), per_channel=0.5
                        ),  # change brightness of images (50-150% of original value)
                        iaa.ContrastNormalization(
                            (0.5, 2.0),
                            per_channel=0.5),  # improve or worsen the contrast
                    ],
                    random_order=True)
            ],
            random_order=True)
        if shuffle: np.random.shuffle(self.images)
Exemplo n.º 10
0
def _main_(args):

    ###############################
    #   Prepare data to be detected
    ###############################

    # data_folder = "/home/peng/data/good_rolo_data/"
    data_folder = "/home/peng/data/sort_data/images/"
    # data_folder = "/home/peng/data/sort_data/images/"
    video_folders_list = sorted(glob.glob(data_folder + '*'))
    sort_nicely(video_folders_list)

    ###############################
    #   Make the model and Load trained weights
    ###############################

    dn.set_gpu(0)
    # Original YOLOv3 weights
    net = dn.load_net("cfg/yolov3.cfg", "yolov3.weights", 0)
    meta = dn.load_meta("cfg/coco.data")
    # Aerial YOLOv3 weights
    # net = dn.load_net("cfg/yolov3.cfg", "yolov3-aerial.weights", 0)
    # meta = dn.load_meta("cfg/voc.data")
    ###############################
    #   Predict bounding boxes
    ###############################

    for video_folder in video_folders_list:
        video_name = basename(video_folder)

        #if video_name != "person14_3":
        #    continue

        print("Processing %s." % video_name)
        image_paths = sorted(glob.glob(os.path.join(video_folder, '*jpg')))
        sort_nicely(image_paths)
        """ Remember to modify the following path """
        with open('det_mot(before_ft)/' + video_name + '.txt',
                  'w') as out_file:
            for i in tqdm(range(len(image_paths))):
                # image = cv2.imread(image_paths[i])
                results = dn.detect(net,
                                    meta,
                                    image_paths[i],
                                    thresh=0.45,
                                    nms=0.5)

                for r in results:
                    if r[0] == 'person' and r[1] > 0.88:
                        box = BoundBox(r[2][0], r[2][1], r[2][2], r[2][3],
                                       r[1], r[0])
                        x1 = (box.x - box.w / 2)
                        y1 = (box.y - box.h / 2)
                        print('%d,-1,%.2f,%.2f,%.2f,%.2f,%.6f,-1,-1,-1' %
                              (i + 1, x1, y1, box.w, box.h, box.c),
                              file=out_file)
 def __init__(self, images, config, shuffle=True, augment=True, norm=None):
     self.images = images
     self.config = config
     self.shuffle = shuffle
     # self.norm = normalize
     self.norm = norm
     self.jitter = augment
     self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i], config['ANCHORS'][2*i+1])\
             for i in range(int(len(config['ANCHORS'])//2))]
     self.idx = 0
Exemplo n.º 12
0
def to_bboxes(annos):
    from utils import BoundBox
    new_bboxes = []
    for anno in annos:
        category_id = anno['category_id']
        classes = np.zeros((num_classes, ), np.float32)
        classes[catId2idx[category_id]] = 1.0
        x, y, w, h = anno['bbox']
        bbox = BoundBox(x, y, x + w, y + h, 1.0, classes)
        new_bboxes.append(bbox)
    return new_bboxes
Exemplo n.º 13
0
    def __init__(self, images, config, shuffle=True, jitter=True, norm=None):
        self.generator = None

        self.images = images
        self.config = config

        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm

        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]

        sometimes = lambda aug: iaa.Sometimes(1., aug)

        self.aug_pipe = iaa.Sequential(
            [
                iaa.SomeOf(
                    (0, 5),
                    [
                        iaa.OneOf([
                            iaa.GaussianBlur((0, 2.0)),
                            iaa.AverageBlur(k=(2, 5)),
                            iaa.MedianBlur(k=(1, 7)),
                        ]),
                        iaa.Sharpen(alpha=(0, 0.5),
                                    lightness=(0.75, 1.5)),  # sharpen images
                        sometimes(
                            iaa.OneOf([
                                iaa.EdgeDetect(alpha=(0, 0.5)),
                                iaa.DirectedEdgeDetect(alpha=(0, 0.5),
                                                       direction=(0.0, 1.0)),
                            ])),
                        iaa.AdditiveGaussianNoise(
                            loc=0, scale=(0.0, 0.005 * 255), per_channel=0.5),
                        iaa.Add((-10, 10), per_channel=0.5),
                        iaa.Multiply((0.8, 1.2), per_channel=0.5),
                        iaa.ContrastNormalization((0.5, 1.5), per_channel=0.5),
                        iaa.Grayscale(alpha=(0.0, 0.5)),
                        sometimes(
                            iaa.ElasticTransformation(alpha=(0.5, 3.5),
                                                      sigma=0.25)),
                    ],
                    random_order=True)
            ],
            random_order=True)

        if shuffle:
            np.random.shuffle(self.images['images_with_annotations'])
Exemplo n.º 14
0
    def __init__(self, images, config, shuffle=True, augment=True, norm=None):

        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]

        super(BatchGenerator, self).__init__(images,
                                             config,
                                             shuffle=shuffle,
                                             augment=augment,
                                             norm=norm)
Exemplo n.º 15
0
    def __init__(self,
                 images,
                 config,
                 shuffle=True,
                 jitter=True,
                 norm=None,
                 flipflop=True,
                 shoechanger=True,
                 zeropad=True):
        self.generator = None

        self.flipflop = flipflop
        self.shoechanger = shoechanger
        if self.flipflop or self.shoechanger:
            self.badshoes = []
            for im in os.listdir('imgs/more_badshoes'):
                self.badshoes.append(cv2.imread('imgs/more_badshoes/' + im))

        self.zeropad = zeropad

        self.images = images
        self.config = config

        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm

        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]
        self.labels_to_names = {
            0: 'goodhelmet',
            1: 'LP',
            2: 'goodshoes',
            3: 'badshoes',
            4: 'badhelmet',
            5: 'person'
        }
        self.names_to_labels = {
            'goodhelmet': 0,
            'LP': 1,
            'goodshoes': 2,
            'badshoes': 3,
            'badhelmet': 4,
            'person': 5
        }

        if shuffle:
            np.random.shuffle(self.images)
Exemplo n.º 16
0
    def __init__(self, images, config, shuffle=True, augment=True, norm=None):
        self.generator = None

        self.images = images
        self.config = config

        self.shuffle = shuffle
        self.augment = augment
        self.norm = norm

        self.counter = 0
        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]

        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        # Define our sequence of augmentation steps that will be applied to every image
        # All augmenters with per_channel=0.5 will sample one value _per image_
        # in 50% of all cases. In all other cases they will sample new values
        # _per channel_.
        self.aug_pipe = iaa.Sequential(
            [
                sometimes(iaa.Affine()),
                iaa.SomeOf(
                    (0, 4),
                    [
                        iaa.GaussianBlur(
                            (0, 2.0
                             )),  # blur images with a sigma between 0 and 2.0
                        iaa.AdditiveGaussianNoise(
                            loc=0, scale=(0.0, 0.05 * 255),
                            per_channel=0.5),  # add gaussian noise to images
                        iaa.Dropout(
                            (0.01, 0.1), per_channel=0.5
                        ),  # randomly remove up to 10% of the pixels
                        iaa.Add(
                            (-10, 10), per_channel=0.5
                        ),  # change brightness of images (by -10 to 10 of original value)
                        iaa.Multiply((0.8, 1.2), per_channel=0.5),
                        iaa.ContrastNormalization(
                            (0.5, 2.0),
                            per_channel=0.5),  # improve or worsen the contrast
                    ],
                    random_order=True)
            ],
            random_order=True)

        if shuffle: np.random.shuffle(self.images)
Exemplo n.º 17
0
 def create_bboxes(self, annotations):
     bboxes = []
     for annotation in annotations:
         annotation.x *= (self.config['IMAGE_W'] / self.config['ORIG_SIZE'])
         annotation.y *= (self.config['IMAGE_H'] / self.config['ORIG_SIZE'])
         annotation.height *= (self.config['IMAGE_W'] /
                               self.config['ORIG_SIZE'])
         annotation.width *= (self.config['IMAGE_H'] /
                              self.config['ORIG_SIZE'])
         bboxes.append(
             BoundBox(annotation.x, annotation.y,
                      annotation.x + annotation.width,
                      annotation.y + annotation.height))
     return bboxes
Exemplo n.º 18
0
    def __init__(self, images, 
                       config, 
                       shuffle=True, 
                       jitter=True, 
                       norm=None):
        self.generator = None

        self.images = images
        self.config = config

        self.shuffle = shuffle
        self.jitter  = jitter
        self.norm    = norm

        self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i], config['ANCHORS'][2*i+1]) for i in range(int(len(config['ANCHORS'])//2))]

        ### augmentors by https://github.com/aleju/imgaug
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        # Define our sequence of augmentation steps that will be applied to every image
        # All augmenters with per_channel=0.5 will sample one value _per image_
        # in 50% of all cases. In all other cases they will sample new values
        # _per channel_.
        self.aug_pipe = iaa.Sequential(
            [

                # execute 0 to 5 of the following (less important) augmenters per image
                # don't execute all of them, as that would often be way too strong
                iaa.SomeOf((0, 5),
                    [


                        sometimes(iaa.OneOf([
                            iaa.EdgeDetect(alpha=(0, 0.7)),
                        #    iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)),
                        ])),

                        iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
                        iaa.Multiply((0.5, 1.5), per_channel=0.5), # change brightness of images (50-150% of original value)
                        iaa.ContrastNormalization((0.5, 1.0), per_channel=0.5), # improve or worsen the contrast

                    ],
                    random_order=True
                )
            ],
            random_order=True
        )

        if shuffle: np.random.shuffle(self.images)
Exemplo n.º 19
0
    def __init__(self, images, config, shuffle=True, norm=None):

        self.generator = None

        self.images = images
        self.config = config
        self.shuffle = shuffle
        self.norm = norm

        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]

        if shuffle: np.random.shuffle(self.images)
Exemplo n.º 20
0
    def __init__(self, images,
                       config,
                       jitter=True,
                       norm=None):

        self.images = images
        self.config = config

        self.jitter  = jitter
        self.norm    = norm

        self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i], config['ANCHORS'][2*i+1]) for i in range(int(len(config['ANCHORS'])//2))]

        ### augmentors by https://github.com/aleju/imgaug
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        # Define our sequence of augmentation steps that will be applied to every image
        # All augmenters with per_channel=0.5 will sample one value _per image_
        # in 50% of all cases. In all other cases they will sample new values
        # _per channel_.
        self.aug_pipe = iaa.Sequential(
            [
                # apply the following augmenters to most images
                # execute 0 to 5 of the following (less important) augmenters per image
                # don't execute all of them, as that would often be way too strong
                iaa.SomeOf((0, 5),
                    [
                        iaa.OneOf([
                            iaa.GaussianBlur((0, 1.0)), # blur images with a sigma between 0 and 3.0
                            iaa.AverageBlur(k=(3, 5)), # blur image using local means with kernel sizes between 2 and 7
                            iaa.MedianBlur(k=(3, 5)), # blur image using local medians with kernel sizes between 2 and 7
                        ]),
                        iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
                        iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
                        iaa.OneOf([
                            iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels
                            iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2)
                        ]),
                        iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
                        iaa.Multiply((0.8, 1.2), per_channel=0.5), # change brightness of images (50-150% of original value)
                        iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
                    ],
                    random_order=True
                )
            ],
            random_order=True
        )
Exemplo n.º 21
0
def get_ground_truth(image_path):
    annot_name = image_path[image_path.rfind('\\') +
                            1:image_path.rfind('.')] + '.xml'
    annot_name = os.path.join('robot-dataset', 'annotations', annot_name)
    with open(annot_name) as f:
        annot = f.read()
        global xmin, xmax, ymin, ymax
        xmin = float(annot[annot.find('<xmin>') + 6:annot.find('</xmin>')])
        xmax = float(annot[annot.find('<xmax>') + 6:annot.find('</xmax>')])
        ymin = float(annot[annot.find('<ymin>') + 6:annot.find('</ymin>')])
        ymax = float(annot[annot.find('<ymax>') + 6:annot.find('</ymax>')])
        x = np.average((xmin, xmax)) / 416
        y = np.average((ymin, ymax)) / 416
        w = (xmax - xmin) / 416
        h = (ymax - ymin) / 416
        box = BoundBox(x, y, w, h, 1.0, np.array((0.0, 1.0)))
        return box
Exemplo n.º 22
0
    def __init__(
            self,
            instances,
            anchors,
            labels,
            downsample=32,  # ratio between network input's size and network output's size, 32 for YOLOv3
            max_box_per_image=30,
            batch_size=1,
            min_net_size=320,
            max_net_size=608,
            shuffle=True,
            jitter=True,
            norm=None):
        self.instances = instances
        self.batch_size = 1
        self.labels = labels
        self.downsample = downsample
        self.max_box_per_image = max_box_per_image
        self.min_net_size = (min_net_size // self.downsample) * self.downsample
        self.max_net_size = (max_net_size // self.downsample) * self.downsample
        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm
        self.anchors = [
            BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1])
            for i in range(len(anchors) // 2)
        ]
        self.labels_to_names = {
            0: 'goodhelmet',
            1: 'LP',
            2: 'goodshoes',
            3: 'badshoes',
            4: 'badhelmet',
            5: 'person'
        }
        self.names_to_labels = {
            'goodhelmet': 0,
            'LP': 1,
            'goodshoes': 2,
            'badshoes': 3,
            'badhelmet': 4,
            'person': 5
        }

        if shuffle:
            np.random.shuffle(self.instances)
Exemplo n.º 23
0
def get_ground_truth(coco, imgId):
    gt = np.zeros((grid_h, grid_w, num_box, 4 + 1 + num_classes),
                  dtype=np.float32)
    annIds = coco.getAnnIds(imgIds=[imgId])
    annos = coco.loadAnns(ids=annIds)
    for anno in annos:
        category_id = anno['category_id']
        bx, by, bw, bh = anno['bbox']
        bx = 1.0 * bx * image_w
        by = 1.0 * by * image_h
        bw = 1.0 * bw * image_w
        bh = 1.0 * bh * image_h
        center_x = bx + bw / 2.
        center_x = center_x / grid_size
        center_y = by + bh / 2.
        center_y = center_y / grid_size
        cell_x = int(np.clip(np.floor(center_x), 0.0, (grid_w - 1)))
        cell_y = int(np.clip(np.floor(center_y), 0.0, (grid_h - 1)))
        center_w = bw / grid_size
        center_h = bh / grid_size
        box = [center_x, center_y, center_w, center_h]

        # find the anchor that best predicts this box
        best_anchor = -1
        max_iou = -1

        shifted_box = BoundBox(0, 0, center_w, center_h)

        for i in range(len(anchor_boxes)):
            anchor = anchor_boxes[i]
            iou = bbox_iou(shifted_box, anchor)

            if max_iou < iou:
                best_anchor = i
                max_iou = iou

        # assign ground truth x, y, w, h, confidence and class probs
        gt[cell_y, cell_x, best_anchor, 0] = 1.0
        gt[cell_y, cell_x, best_anchor, 1:5] = box
        gt[cell_y, cell_x, best_anchor, 5 + catId2idx[category_id]] = 1.0
    return gt
def results2dets(results, image_shape):
    """ Convert results of yolo to [x1, y1, x2, y2, confidence]
        Params:
            results: detected results of YOLO
            image_shape: shape of image
        Return:
            2d array of detected box, shape:(#detected_box, 5)
    """
    seq_dets = []

    for r in results:
        # Enough confidence of person
        if r[0] == 'person' and r[1] > 0.87:
            box = BoundBox(r[2][0], r[2][1], r[2][2], r[2][3], r[1], r[0])
            x1 = (box.x - box.w / 2)
            y1 = (box.y - box.h / 2)
            x2 = (box.x + box.w / 2)
            y2 = (box.y + box.h / 2)
            seq_dets.append([x1, y1, x2, y2, box.c])

    return np.array(seq_dets)
Exemplo n.º 25
0
    def __init__(self, config):
        self.config = config
        self.batch_size = config['train']['batch_size']
        self.labels = config['model']['labels']
        self.down_sample = 32
        self.max_box_per_image = config['train']['max_box_per_image']
        self.min_net_size = (config['model']['min_input_size'] //
                             self.down_sample) * self.down_sample
        self.max_net_size = (config['model']['max_input_size'] //
                             self.down_sample) * self.down_sample
        self.jitter = 0.3
        self.norm = lambda t: t / 255.0
        self.anchors = [
            BoundBox(0, 0, config['model']['anchors'][2 * i],
                     config['model']['anchors'][2 * i + 1])
            for i in range(len(config['model']['anchors']) // 2)
        ]
        self.net_h = config['model']['input_size']
        self.net_w = config['model']['input_size']

        self.idx = 0
Exemplo n.º 26
0
 def __init__(self, images, config, shuffle=True, jitter=True, norm=None):
     self.generator = None
     self.images = images
     self.config = config
     self.shuffle = shuffle
     self.jitter = jitter
     self.norm = norm
     self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i],\
         config['ANCHORS'][2*i+1]) for i in range(int(len(\
         config['ANCHORS'])//2))]
     sometimes = lambda aug: iaa.Sometimes(0.5, aug)
     self.aug_pipe = iaa.Sequential(
      [
       sometimes(iaa.Affine(
       )),
       iaa.SomeOf((0, 5),
        [
         iaa.OneOf([
          iaa.GaussianBlur((0, 3.0)),
          iaa.AverageBlur(k=(2, 7)),
          iaa.MedianBlur(k=(3, 11)),
         ]),
         iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
         iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255),\
                per_channel=0.5),
         iaa.OneOf([
          iaa.Dropout((0.01, 0.1), per_channel=0.5),
         ]),
         iaa.Add((-10, 10), per_channel=0.5),
         iaa.Multiply((0.5, 1.5), per_channel=0.5),
         iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5),
        ],
        random_order=True
       )
      ],
      random_order=True
     )
     if shuffle: np.random.shuffle(self.images)
Exemplo n.º 27
0
    def __init__(self, images, config, shuffle=True, jitter=True, norm=None):
        self.generator = None

        self.images = images
        self.config = config

        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm

        self.anchors = [
            BoundBox(0, 0, config['ANCHORS'][2 * i],
                     config['ANCHORS'][2 * i + 1])
            for i in range(int(len(config['ANCHORS']) // 2))
        ]

        ### augmentors by https://github.com/aleju/imgaug
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        # Define our sequence of augmentation steps that will be applied to every image
        # All augmenters with per_channel=0.5 will sample one value _per image_
        # in 50% of all cases. In all other cases they will sample new values
        # _per channel_.
        self.aug_pipe = iaa.Sequential(
            [
                # apply the following augmenters to most images
                #iaa.Fliplr(0.5), # horizontally flip 50% of all images
                #iaa.Flipud(0.2), # vertically flip 20% of all images
                #sometimes(iaa.Crop(percent=(0, 0.1))), # crop images by 0-10% of their height/width
                sometimes(
                    iaa.Affine(
                        #scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
                        #translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
                        #rotate=(-5, 5), # rotate by -45 to +45 degrees
                        #shear=(-5, 5), # shear by -16 to +16 degrees
                        #order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
                        #cval=(0, 255), # if mode is constant, use a cval between 0 and 255
                        #mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
                    )),
                # execute 0 to 5 of the following (less important) augmenters per image
                # don't execute all of them, as that would often be way too strong
                iaa.SomeOf(
                    (0, 5),
                    [
                        #sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
                        iaa.OneOf([
                            iaa.GaussianBlur(
                                (0, 3.0)
                            ),  # blur images with a sigma between 0 and 3.0
                            iaa.AverageBlur(
                                k=(2, 7)
                            ),  # blur image using local means with kernel sizes between 2 and 7
                            iaa.MedianBlur(
                                k=(3, 11)
                            ),  # blur image using local medians with kernel sizes between 2 and 7
                        ]),
                        iaa.Sharpen(alpha=(0, 1.0),
                                    lightness=(0.75, 1.5)),  # sharpen images
                        #iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
                        # search either for all edges or for directed edges
                        #sometimes(iaa.OneOf([
                        #    iaa.EdgeDetect(alpha=(0, 0.7)),
                        #    iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)),
                        #])),
                        iaa.AdditiveGaussianNoise(
                            loc=0, scale=(0.0, 0.05 * 255),
                            per_channel=0.5),  # add gaussian noise to images
                        iaa.OneOf([
                            iaa.Dropout(
                                (0.01, 0.1), per_channel=0.5
                            ),  # randomly remove up to 10% of the pixels
                            #iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
                        ]),
                        #iaa.Invert(0.05, per_channel=True), # invert color channels
                        iaa.Add(
                            (-10, 10), per_channel=0.5
                        ),  # change brightness of images (by -10 to 10 of original value)
                        iaa.Multiply(
                            (0.5, 1.5), per_channel=0.5
                        ),  # change brightness of images (50-150% of original value)
                        iaa.ContrastNormalization(
                            (0.5, 2.0),
                            per_channel=0.5),  # improve or worsen the contrast
                        #iaa.Grayscale(alpha=(0.0, 1.0)),
                        #sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
                        #sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around
                    ],
                    random_order=True)
            ],
            random_order=True)

        if shuffle: np.random.shuffle(self.images)
Exemplo n.º 28
0
    def __getitem__(self, idx):
        l_bound = idx * self.config['BATCH_SIZE']
        r_bound = (idx + 1) * self.config['BATCH_SIZE']

        if r_bound > len(self.images):
            r_bound = len(self.images)
            l_bound = r_bound - self.config['BATCH_SIZE']

        instance_count = 0

        x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'],
                            self.config['IMAGE_W'], 3))  # input images
        b_batch = np.zeros(
            (r_bound - l_bound, 1, 1, 1, self.config['TRUE_BOX_BUFFER'], 4)
        )  # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
        y_batch = np.zeros(
            (r_bound - l_bound, self.config['GRID_H'], self.config['GRID_W'],
             self.config['BOX'],
             4 + 1 + 3 + self.config['CLASS']))  # desired network output

        for train_instance in self.images[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self.aug_image(train_instance, jitter=self.jitter)

            # construct output from object's x, y, w, h
            true_box_index = 0

            for obj in all_objs:
                if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj[
                        'ymin'] and obj['name'] in self.config['LABELS']:
                    center_x = .5 * (obj['xmin'] + obj['xmax'])
                    center_x = center_x / (float(self.config['IMAGE_W']) /
                                           self.config['GRID_W'])
                    center_y = .5 * (obj['ymin'] + obj['ymax'])
                    center_y = center_y / (float(self.config['IMAGE_H']) /
                                           self.config['GRID_H'])

                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    if grid_x < self.config['GRID_W'] and grid_y < self.config[
                            'GRID_H']:
                        obj_indx = self.config['LABELS'].index(obj['name'])

                        center_w = (obj['xmax'] - obj['xmin']) / (
                            float(self.config['IMAGE_W']) /
                            self.config['GRID_W'])  # unit: grid cell
                        center_h = (obj['ymax'] - obj['ymin']) / (
                            float(self.config['IMAGE_H']) /
                            self.config['GRID_H'])  # unit: grid cell

                        box = [center_x, center_y, center_w, center_h]

                        # find the anchor that best predicts this box
                        best_anchor = -1
                        max_iou = -1

                        shifted_box = BoundBox(0, 0, center_w, center_h)

                        for i in range(len(self.anchors)):
                            anchor = self.anchors[i]
                            iou = bbox_iou(shifted_box, anchor)

                            if max_iou < iou:
                                best_anchor = i
                                max_iou = iou

                        # assign ground truth x, y, w, h, confidence and class probs to y_batch
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                0:4] = box
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                4] = 1.
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                5 + obj_indx] = 1
                        y_batch[instance_count, grid_y, grid_x, best_anchor,
                                6:] = [
                                    obj['pose_x'], obj['pose_y'], obj['pose_z']
                                ]

                        # assign the true box to b_batch
                        b_batch[instance_count, 0, 0, 0, true_box_index] = box

                        true_box_index += 1
                        true_box_index = true_box_index % self.config[
                            'TRUE_BOX_BUFFER']

            # assign input image to x_batch
            if self.norm != None:
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:
                        cv2.rectangle(img[:, :, ::-1],
                                      (obj['xmin'], obj['ymin']),
                                      (obj['xmax'], obj['ymax']), (255, 0, 0),
                                      3)
                        cv2.putText(img[:, :, ::-1], obj['name'],
                                    (obj['xmin'] + 2, obj['ymin'] + 12), 0,
                                    1.2e-3 * img.shape[0], (0, 255, 0), 2)

                x_batch[instance_count] = img

            # increase instance counter in current batch
            instance_count += 1

        #print ' new batch created', idx

        return [x_batch, b_batch], y_batch
Exemplo n.º 29
0
def _main_(args):
    config_path  = args.conf
    weights_path = args.weights
    image_path   = args.input

    with open(config_path) as config_buffer:    
        config = json.load(config_buffer)

    ###############################
    #   Make the model 
    ###############################

    yolo = YOLO(backend          = config['model']['backend'],
        input_shape         = config['model']['input_shape'],
        labels              = config['model']['labels'],
        max_box_per_image   = config['model']['max_box_per_image'],
        anchors             = config['model']['anchors'])


    ###############################
    #   Load trained weights
    ###############################    

    yolo.load_weights(weights_path)

    ###############################
    #   Predict bounding boxes 
    ###############################

    fig,ax=plt.subplots(1)
# bbox x                    # globe eta
# bbox y                    # globe phi
# bbox width             # Gaussian sigma (required to be 3*sigma<pi)
# bbox height            # Gaussian sigma (required to be 3*sigma<pi)

    file_content = np.load(image_path)
    images = file_content['raw']
    truth_boxes = file_content['truth']
    for image_index in range(10):
        image = images[image_index]
        all_objs = truth_boxes[image_index]

        print(image.shape)
        boxes = yolo.predict(image)
        print(len(boxes), 'boxes are found')
        for i in range(len(boxes)):
            b = boxes[i]
            print('box:',i,b)
        draw_boxes(image, ax, boxes, config['model']['labels'],color='y',scale=True)

        obj_boxes=[]
        i=0
        for obj in all_objs:
            # x,y,w,h = obj[:4]
            y,x,h,w = obj[1:5]
            b = BoundBox(x-w/2,y-h/2,x+w/2,y+h/2)
            # print('box:',i,b,obj[5],obj[6],obj[7],obj[8],obj[9])
            print('box:',i,obj)
            obj_boxes.append( b )
            i+=1
        draw_boxes(image, ax, obj_boxes, config['model']['labels'],color='g',scale=False)

        #image = draw_boxes(image, boxes, config['model']['labels'])
        i=np.swapaxes(np.swapaxes(image,0,1),1,2)
        x=np.sum(i,axis=2)
        
        #plt.imshow(x,cmap='hot')
        plt.imshow(x,aspect='auto',extent=(0,256,0,9600),interpolation='nearest',cmap=cm.jet)        
        plt.savefig('out%d.png' % image_index ,dpi=200)
Exemplo n.º 30
0
def test_frame_selector():
    from utils import BoundBox
    skip_rate = 3
    frame_rate = 30
    Ns, Ks, Ts = 70, 30, 35
    print("Ns = %d, Ks = %d, Ts = %d" % (Ns, Ks, Ts))
    N, K = Ns * frame_rate / skip_rate, Ks * frame_rate
    T = Ts * frame_rate / skip_rate
    print("N = %d, K = %d, T = %d" % (N, K, T))
    non_None_threshold = 1
    fm_frame_selector = FmFrameSelector(N,
                                        K,
                                        T,
                                        "",
                                        label_ind=3,
                                        non_None_threshold=non_None_threshold)
    i = 0
    image = np.zeros((640, 640, 3))
    num_teeth = 6
    corr_bbox1 = BoundBox(0.1,
                          0.1,
                          0.2,
                          0.2,
                          0.6, [0., 0.08, 0., 0.92, 0.],
                          label=3)
    bboxes = [
        BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 2, label=2),
        BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 2),
        BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 1), corr_bbox1,
        BoundBox(0.2, 0.2, 0.4, 0.4, 0.5, 4, label=0)
    ]
    fm_frame_selector.update(i,
                             image,
                             bboxes,
                             num_teeth,
                             write_selection=False)
    assert fm_frame_selector.bbox_buffer[0] == corr_bbox1

    i += 1
    bboxes = []
    fm_frame_selector.update(i,
                             image,
                             bboxes,
                             num_teeth,
                             write_selection=False)

    i += 1
    corr_bbox2 = BoundBox(0.1,
                          0.1,
                          0.2,
                          0.2,
                          0.22, [0.05, 0., 0., 0.90, 0.05],
                          label=3)
    bboxes = [
        BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 2, label=2),
        BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 2),
        BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 1), corr_bbox2,
        BoundBox(0.2, 0.2, 0.4, 0.4, 0.5, 4, label=0)
    ]
    fm_frame_selector.update(i,
                             image,
                             bboxes,
                             num_teeth,
                             write_selection=False)
    assert fm_frame_selector.bbox_buffer[-1] == corr_bbox2
    assert len(fm_frame_selector.frame_buffer) == 3
    assert len(fm_frame_selector.bbox_buffer) -\
            fm_frame_selector.bbox_buffer.count(None) == 2

    frame, bbox, ind = fm_frame_selector.select_frame()
    assert bbox == corr_bbox1
    assert ind == 0

    for j in range(100):
        i += 1
        bboxes = []
        fm_frame_selector.update(i,
                                 image,
                                 bboxes,
                                 num_teeth,
                                 write_selection=False)
    frame, bbox, ind = fm_frame_selector.select_frame()
    assert bbox == corr_bbox1
    assert ind == 0
Exemplo n.º 31
0
    def __call__(self, images, annotations, shapes, aug=True):
        # get image input size, change every 10 batches
        if aug:
            self.idx += 1
            net_h, net_w = self._get_net_size()
        else:
            net_h, net_w = self.config['model']['input_size'], self.config[
                'model']['input_size']

        base_grid_h, base_grid_w = net_h // self.down_sample, net_w // self.down_sample

        x_batch = np.zeros((self.batch_size, net_h, net_w, 3),
                           dtype=np.float32)
        t_batch = np.zeros(
            (self.batch_size, 1, 1, 1, self.max_box_per_image, 4),
            dtype=np.float32)

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((self.batch_size, 1 * base_grid_h, 1 * base_grid_w,
                           len(self.anchors) // 3, 4 + 1 + len(self.labels)),
                          dtype=np.float32)
        yolo_2 = np.zeros((self.batch_size, 2 * base_grid_h, 2 * base_grid_w,
                           len(self.anchors) // 3, 4 + 1 + len(self.labels)),
                          dtype=np.float32)
        yolo_3 = np.zeros((self.batch_size, 4 * base_grid_h, 4 * base_grid_w,
                           len(self.anchors) // 3, 4 + 1 + len(self.labels)),
                          dtype=np.float32)
        yolos = [yolo_3, yolo_2, yolo_1]

        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for img, ann, shape in zip(images, annotations, shapes):
            ann = json.loads(ann)
            img = cv2.resize(img, (shape[1], shape[0]))
            # augment input image and fix object's position and size
            if aug:
                img, all_objs = self._aug_image(img, ann, net_h, net_w)
            else:
                img, all_objs = self._raw_image(img, ann, net_h, net_w)

            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None
                max_index = -1
                max_iou = -1
                # not only max iou anchor but also larger than threshold anchors are positive.
                positive_anchors = []
                positive_threshold = 0.3

                shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'],
                                       obj['ymax'] - obj['ymin'])

                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index = i
                        max_iou = iou
                    if iou > positive_threshold:
                        positive_anchors.append([i, anchor])
                if not positive_anchors:
                    positive_anchors.append([max_index, max_anchor])

                for max_index, max_anchor in positive_anchors:
                    # determine the yolo to be responsible for this bounding box
                    yolo = yolos[max_index // 3]
                    grid_h, grid_w = yolo.shape[1:3]

                    # determine the position of the bounding box on the grid
                    center_x = .5 * (obj['xmin'] + obj['xmax'])
                    center_x = center_x / float(
                        net_w) * grid_w  # sigma(t_x) + c_x
                    center_y = .5 * (obj['ymin'] + obj['ymax'])
                    center_y = center_y / float(
                        net_h) * grid_h  # sigma(t_y) + c_y

                    # determine the sizes of the bounding box
                    w = np.log((obj['xmax'] - obj['xmin']) /
                               float(max_anchor.xmax))  # t_w
                    h = np.log((obj['ymax'] - obj['ymin']) /
                               float(max_anchor.ymax))  # t_h

                    box = [center_x, center_y, w, h]

                    # determine the index of the label
                    obj_indx = self.labels.index(obj['name'])

                    # determine the location of the cell responsible for this object
                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    # assign ground truth x, y, w, h, confidence and class probs to y_batch
                    yolo[instance_count, grid_y, grid_x, max_index % 3] = 0
                    yolo[instance_count, grid_y, grid_x, max_index % 3,
                         0:4] = box
                    yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1.
                    yolo[instance_count, grid_y, grid_x, max_index % 3,
                         5 + obj_indx] = 1

                    # assign the true box to t_batch
                    true_box = [
                        center_x, center_y, obj['xmax'] - obj['xmin'],
                        obj['ymax'] - obj['ymin']
                    ]
                    t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                    true_box_index += 1
                    true_box_index = true_box_index % self.max_box_per_image

            # assign input image to x_batch
            if aug and self.norm is not None:
                x_batch[instance_count] = self.norm(img)
            elif not aug:
                x_batch[instance_count] = img
            # increase instance counter in the current batch
            instance_count += 1

        output = [x_batch, t_batch, yolo_1, yolo_2, yolo_3]
        if not aug:
            output += [images, annotations, shapes]
        return output