def __init__( self, instances, anchors, labels, downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image=30, batch_size=1, min_net_size=320, max_net_size=608, shuffle=True, jitter=True, norm=None): self.instances = instances self.batch_size = batch_size self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image self.min_net_size = (min_net_size // self.downsample) * self.downsample self.max_net_size = (max_net_size // self.downsample) * self.downsample self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1]) for i in range(len(anchors) // 2) ] self.net_h = 416 self.net_w = 416 if shuffle: np.random.shuffle(self.instances)
def get_ground_truth(boxes): gt = np.zeros((grid_h, grid_w, num_box, 4 + 1 + num_classes), dtype=np.float32) for bbox in boxes: bx, by, bw, bh = bbox center_x = bx + bw / 2. center_x = center_x / float(image_w / grid_w) center_y = by + bh / 2. center_y = center_y / float(image_h / grid_h) cell_x = int(np.floor(center_x)) cell_y = int(np.floor(center_y)) center_w = bw / grid_size center_h = bh / grid_size box = [center_x, center_y, center_w, center_h] # find the anchor that best predicts this box best_anchor = -1 max_iou = -1 shifted_box = BoundBox(0, 0, center_w, center_h) for i in range(len(anchor_boxes)): anchor = anchor_boxes[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: best_anchor = i max_iou = iou # assign ground truth x, y, w, h, confidence and class probs gt[cell_y, cell_x, best_anchor, 0] = 1.0 gt[cell_y, cell_x, best_anchor, 1:5] = box gt[cell_y, cell_x, best_anchor, 5] = 1.0 return gt
def __init__(self, img_files, config, batch_size, shuffle=False, jitter=True, norm=None): self.generator = None self.config = config self.filelist = img_files self.evts_per_file = config['EVTS_PER_FILE'] self.batch_size = batch_size self.nevts = len(img_files) * config['EVTS_PER_FILE'] self.nbatches = int(self.nevts * (1. / self.batch_size)) self.num_classes = len(config['LABELS']) self.num_grid_x = config['GRID_W'] self.num_grid_y = config['GRID_H'] self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] '''
def __init__(self, config, images_dir, images, annotations, shuffle=True, jitter=False, norm=None): self.config = config self.images = images self.images_dir = images_dir self.annotations = annotations self.shuffle = shuffle self.jitter = jitter self.norm = norm self.image_height = self.config["model"]["image_size"] self.image_width = self.config["model"]["image_size"] self.true_box_buffer = self.config["model"]["nb_box"] self.number_of_grids = self.config["model"]["horizontal_grids"] self.box = self.config["model"]["box"] self.input_image_width = self.config["model"]["input_image_width"] self.input_image_height = self.config["model"]["input_image_height"] self.anchors = [ BoundBox(0, 0, config["model"]["anchors"][2 * i], config["model"]["anchors"][2 * i + 1]) for i in range(int(len(config["model"]["anchors"]) // 2)) ]
def __init__(self, config, image_fps, annotations, shuffle=True, jitter=True, norm=None): '''Creates a generator that supplies training and validation sets in discrete batches config: a dictionary of constants that tells properties of image image_fps: a list of file paths to each of the training images annotations: a dictionary contain the labels for each image. Indexed by file path I believe shuffle: bool for whether we should shuffle between epochs ''' self.generator = None self.image_fps = image_fps self.config = config self.shuffle = shuffle self.jitter = jitter self.norm = norm self.ORIG_SIZE = 1024 self.image_annotations = annotations self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] if shuffle: np.random.shuffle(self.image_fps)
def to_bboxes(bboxes): from utils import BoundBox new_bboxes = [] for box in bboxes: x, y, w, h = box bbox = BoundBox(x, y, x + w, y + h) new_bboxes.append(bbox) return new_bboxes
def decode_netout(self, netout, obj_threshold=0.3, nms_threshold=0.3): grid_h, grid_w, nb_box = netout.shape[:3] boxes = [] # decode the output by the network netout[..., 4] = self.sigmoid(netout[..., 4]) netout[..., 5:] = netout[..., 4][..., np.newaxis] * self.softmax( netout[..., 5:]) netout[..., 5:] *= netout[..., 5:] > obj_threshold for row in range(grid_h): for col in range(grid_w): for b in range(nb_box): # from 4th element onwards are confidence and class classes classes = netout[row, col, b, 5:] if np.sum(classes) > 0: # first 4 elements are x, y, w, and h x, y, w, h = netout[row, col, b, :4] x = (col + self.sigmoid(x) ) / grid_w # center position, unit: image width y = (row + self.sigmoid(y) ) / grid_h # center position, unit: image height w = self.anchors[2 * b + 0] * np.exp( w) / grid_w # unit: image width h = self.anchors[2 * b + 1] * np.exp( h) / grid_h # unit: image height confidence = netout[row, col, b, 4] box = BoundBox(x, y, w, h, confidence, classes) boxes.append(box) # suppress non-maximal boxes for c in range(self.nb_class): sorted_indices = list( reversed(np.argsort([box.classes[c] for box in boxes]))) for i in range(len(sorted_indices)): index_i = sorted_indices[i] if boxes[index_i].classes[c] == 0: continue else: for j in range(i + 1, len(sorted_indices)): index_j = sorted_indices[j] if self.bbox_iou(boxes[index_i], boxes[index_j]) >= nms_threshold: boxes[index_j].classes[c] = 0 # remove the boxes which are less likely than a obj_threshold boxes = [box for box in boxes if box.get_score() > obj_threshold] return boxes
def __init__(self, images, config, shuffle=True, jitter=True, norm=None): self.generator = None self.images = images self.config = config self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] sometimes = lambda aug: iaa.Sometimes(0.5, aug) self.aug_pipe = iaa.Sequential( [ iaa.SomeOf( (0, 5), [ iaa.OneOf([ iaa.GaussianBlur( (0, 3.0) ), # blur images with a sigma between 0 and 3.0 iaa.AverageBlur( k=(2, 7) ), # blur image using local means with kernel sizes between 2 and 7 iaa.MedianBlur( k=(3, 11) ), # blur image using local medians with kernel sizes between 2 and 7 ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), # add gaussian noise to images iaa.OneOf([ iaa.Dropout( (0.01, 0.1), per_channel=0.5 ), # randomly remove up to 10% of the pixels ]), iaa.Add( (-10, 10), per_channel=0.5 ), # change brightness of images (by -10 to 10 of original value) iaa.Multiply( (0.5, 1.5), per_channel=0.5 ), # change brightness of images (50-150% of original value) iaa.ContrastNormalization( (0.5, 2.0), per_channel=0.5), # improve or worsen the contrast ], random_order=True) ], random_order=True) if shuffle: np.random.shuffle(self.images)
def _main_(args): ############################### # Prepare data to be detected ############################### # data_folder = "/home/peng/data/good_rolo_data/" data_folder = "/home/peng/data/sort_data/images/" # data_folder = "/home/peng/data/sort_data/images/" video_folders_list = sorted(glob.glob(data_folder + '*')) sort_nicely(video_folders_list) ############################### # Make the model and Load trained weights ############################### dn.set_gpu(0) # Original YOLOv3 weights net = dn.load_net("cfg/yolov3.cfg", "yolov3.weights", 0) meta = dn.load_meta("cfg/coco.data") # Aerial YOLOv3 weights # net = dn.load_net("cfg/yolov3.cfg", "yolov3-aerial.weights", 0) # meta = dn.load_meta("cfg/voc.data") ############################### # Predict bounding boxes ############################### for video_folder in video_folders_list: video_name = basename(video_folder) #if video_name != "person14_3": # continue print("Processing %s." % video_name) image_paths = sorted(glob.glob(os.path.join(video_folder, '*jpg'))) sort_nicely(image_paths) """ Remember to modify the following path """ with open('det_mot(before_ft)/' + video_name + '.txt', 'w') as out_file: for i in tqdm(range(len(image_paths))): # image = cv2.imread(image_paths[i]) results = dn.detect(net, meta, image_paths[i], thresh=0.45, nms=0.5) for r in results: if r[0] == 'person' and r[1] > 0.88: box = BoundBox(r[2][0], r[2][1], r[2][2], r[2][3], r[1], r[0]) x1 = (box.x - box.w / 2) y1 = (box.y - box.h / 2) print('%d,-1,%.2f,%.2f,%.2f,%.2f,%.6f,-1,-1,-1' % (i + 1, x1, y1, box.w, box.h, box.c), file=out_file)
def __init__(self, images, config, shuffle=True, augment=True, norm=None): self.images = images self.config = config self.shuffle = shuffle # self.norm = normalize self.norm = norm self.jitter = augment self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i], config['ANCHORS'][2*i+1])\ for i in range(int(len(config['ANCHORS'])//2))] self.idx = 0
def to_bboxes(annos): from utils import BoundBox new_bboxes = [] for anno in annos: category_id = anno['category_id'] classes = np.zeros((num_classes, ), np.float32) classes[catId2idx[category_id]] = 1.0 x, y, w, h = anno['bbox'] bbox = BoundBox(x, y, x + w, y + h, 1.0, classes) new_bboxes.append(bbox) return new_bboxes
def __init__(self, images, config, shuffle=True, jitter=True, norm=None): self.generator = None self.images = images self.config = config self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] sometimes = lambda aug: iaa.Sometimes(1., aug) self.aug_pipe = iaa.Sequential( [ iaa.SomeOf( (0, 5), [ iaa.OneOf([ iaa.GaussianBlur((0, 2.0)), iaa.AverageBlur(k=(2, 5)), iaa.MedianBlur(k=(1, 7)), ]), iaa.Sharpen(alpha=(0, 0.5), lightness=(0.75, 1.5)), # sharpen images sometimes( iaa.OneOf([ iaa.EdgeDetect(alpha=(0, 0.5)), iaa.DirectedEdgeDetect(alpha=(0, 0.5), direction=(0.0, 1.0)), ])), iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.005 * 255), per_channel=0.5), iaa.Add((-10, 10), per_channel=0.5), iaa.Multiply((0.8, 1.2), per_channel=0.5), iaa.ContrastNormalization((0.5, 1.5), per_channel=0.5), iaa.Grayscale(alpha=(0.0, 0.5)), sometimes( iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), ], random_order=True) ], random_order=True) if shuffle: np.random.shuffle(self.images['images_with_annotations'])
def __init__(self, images, config, shuffle=True, augment=True, norm=None): self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] super(BatchGenerator, self).__init__(images, config, shuffle=shuffle, augment=augment, norm=norm)
def __init__(self, images, config, shuffle=True, jitter=True, norm=None, flipflop=True, shoechanger=True, zeropad=True): self.generator = None self.flipflop = flipflop self.shoechanger = shoechanger if self.flipflop or self.shoechanger: self.badshoes = [] for im in os.listdir('imgs/more_badshoes'): self.badshoes.append(cv2.imread('imgs/more_badshoes/' + im)) self.zeropad = zeropad self.images = images self.config = config self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] self.labels_to_names = { 0: 'goodhelmet', 1: 'LP', 2: 'goodshoes', 3: 'badshoes', 4: 'badhelmet', 5: 'person' } self.names_to_labels = { 'goodhelmet': 0, 'LP': 1, 'goodshoes': 2, 'badshoes': 3, 'badhelmet': 4, 'person': 5 } if shuffle: np.random.shuffle(self.images)
def __init__(self, images, config, shuffle=True, augment=True, norm=None): self.generator = None self.images = images self.config = config self.shuffle = shuffle self.augment = augment self.norm = norm self.counter = 0 self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] sometimes = lambda aug: iaa.Sometimes(0.5, aug) # Define our sequence of augmentation steps that will be applied to every image # All augmenters with per_channel=0.5 will sample one value _per image_ # in 50% of all cases. In all other cases they will sample new values # _per channel_. self.aug_pipe = iaa.Sequential( [ sometimes(iaa.Affine()), iaa.SomeOf( (0, 4), [ iaa.GaussianBlur( (0, 2.0 )), # blur images with a sigma between 0 and 2.0 iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), # add gaussian noise to images iaa.Dropout( (0.01, 0.1), per_channel=0.5 ), # randomly remove up to 10% of the pixels iaa.Add( (-10, 10), per_channel=0.5 ), # change brightness of images (by -10 to 10 of original value) iaa.Multiply((0.8, 1.2), per_channel=0.5), iaa.ContrastNormalization( (0.5, 2.0), per_channel=0.5), # improve or worsen the contrast ], random_order=True) ], random_order=True) if shuffle: np.random.shuffle(self.images)
def create_bboxes(self, annotations): bboxes = [] for annotation in annotations: annotation.x *= (self.config['IMAGE_W'] / self.config['ORIG_SIZE']) annotation.y *= (self.config['IMAGE_H'] / self.config['ORIG_SIZE']) annotation.height *= (self.config['IMAGE_W'] / self.config['ORIG_SIZE']) annotation.width *= (self.config['IMAGE_H'] / self.config['ORIG_SIZE']) bboxes.append( BoundBox(annotation.x, annotation.y, annotation.x + annotation.width, annotation.y + annotation.height)) return bboxes
def __init__(self, images, config, shuffle=True, jitter=True, norm=None): self.generator = None self.images = images self.config = config self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i], config['ANCHORS'][2*i+1]) for i in range(int(len(config['ANCHORS'])//2))] ### augmentors by https://github.com/aleju/imgaug sometimes = lambda aug: iaa.Sometimes(0.5, aug) # Define our sequence of augmentation steps that will be applied to every image # All augmenters with per_channel=0.5 will sample one value _per image_ # in 50% of all cases. In all other cases they will sample new values # _per channel_. self.aug_pipe = iaa.Sequential( [ # execute 0 to 5 of the following (less important) augmenters per image # don't execute all of them, as that would often be way too strong iaa.SomeOf((0, 5), [ sometimes(iaa.OneOf([ iaa.EdgeDetect(alpha=(0, 0.7)), # iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)), ])), iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value) iaa.Multiply((0.5, 1.5), per_channel=0.5), # change brightness of images (50-150% of original value) iaa.ContrastNormalization((0.5, 1.0), per_channel=0.5), # improve or worsen the contrast ], random_order=True ) ], random_order=True ) if shuffle: np.random.shuffle(self.images)
def __init__(self, images, config, shuffle=True, norm=None): self.generator = None self.images = images self.config = config self.shuffle = shuffle self.norm = norm self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] if shuffle: np.random.shuffle(self.images)
def __init__(self, images, config, jitter=True, norm=None): self.images = images self.config = config self.jitter = jitter self.norm = norm self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i], config['ANCHORS'][2*i+1]) for i in range(int(len(config['ANCHORS'])//2))] ### augmentors by https://github.com/aleju/imgaug sometimes = lambda aug: iaa.Sometimes(0.5, aug) # Define our sequence of augmentation steps that will be applied to every image # All augmenters with per_channel=0.5 will sample one value _per image_ # in 50% of all cases. In all other cases they will sample new values # _per channel_. self.aug_pipe = iaa.Sequential( [ # apply the following augmenters to most images # execute 0 to 5 of the following (less important) augmenters per image # don't execute all of them, as that would often be way too strong iaa.SomeOf((0, 5), [ iaa.OneOf([ iaa.GaussianBlur((0, 1.0)), # blur images with a sigma between 0 and 3.0 iaa.AverageBlur(k=(3, 5)), # blur image using local means with kernel sizes between 2 and 7 iaa.MedianBlur(k=(3, 5)), # blur image using local medians with kernel sizes between 2 and 7 ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images iaa.OneOf([ iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2) ]), iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value) iaa.Multiply((0.8, 1.2), per_channel=0.5), # change brightness of images (50-150% of original value) iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast ], random_order=True ) ], random_order=True )
def get_ground_truth(image_path): annot_name = image_path[image_path.rfind('\\') + 1:image_path.rfind('.')] + '.xml' annot_name = os.path.join('robot-dataset', 'annotations', annot_name) with open(annot_name) as f: annot = f.read() global xmin, xmax, ymin, ymax xmin = float(annot[annot.find('<xmin>') + 6:annot.find('</xmin>')]) xmax = float(annot[annot.find('<xmax>') + 6:annot.find('</xmax>')]) ymin = float(annot[annot.find('<ymin>') + 6:annot.find('</ymin>')]) ymax = float(annot[annot.find('<ymax>') + 6:annot.find('</ymax>')]) x = np.average((xmin, xmax)) / 416 y = np.average((ymin, ymax)) / 416 w = (xmax - xmin) / 416 h = (ymax - ymin) / 416 box = BoundBox(x, y, w, h, 1.0, np.array((0.0, 1.0))) return box
def __init__( self, instances, anchors, labels, downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image=30, batch_size=1, min_net_size=320, max_net_size=608, shuffle=True, jitter=True, norm=None): self.instances = instances self.batch_size = 1 self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image self.min_net_size = (min_net_size // self.downsample) * self.downsample self.max_net_size = (max_net_size // self.downsample) * self.downsample self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1]) for i in range(len(anchors) // 2) ] self.labels_to_names = { 0: 'goodhelmet', 1: 'LP', 2: 'goodshoes', 3: 'badshoes', 4: 'badhelmet', 5: 'person' } self.names_to_labels = { 'goodhelmet': 0, 'LP': 1, 'goodshoes': 2, 'badshoes': 3, 'badhelmet': 4, 'person': 5 } if shuffle: np.random.shuffle(self.instances)
def get_ground_truth(coco, imgId): gt = np.zeros((grid_h, grid_w, num_box, 4 + 1 + num_classes), dtype=np.float32) annIds = coco.getAnnIds(imgIds=[imgId]) annos = coco.loadAnns(ids=annIds) for anno in annos: category_id = anno['category_id'] bx, by, bw, bh = anno['bbox'] bx = 1.0 * bx * image_w by = 1.0 * by * image_h bw = 1.0 * bw * image_w bh = 1.0 * bh * image_h center_x = bx + bw / 2. center_x = center_x / grid_size center_y = by + bh / 2. center_y = center_y / grid_size cell_x = int(np.clip(np.floor(center_x), 0.0, (grid_w - 1))) cell_y = int(np.clip(np.floor(center_y), 0.0, (grid_h - 1))) center_w = bw / grid_size center_h = bh / grid_size box = [center_x, center_y, center_w, center_h] # find the anchor that best predicts this box best_anchor = -1 max_iou = -1 shifted_box = BoundBox(0, 0, center_w, center_h) for i in range(len(anchor_boxes)): anchor = anchor_boxes[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: best_anchor = i max_iou = iou # assign ground truth x, y, w, h, confidence and class probs gt[cell_y, cell_x, best_anchor, 0] = 1.0 gt[cell_y, cell_x, best_anchor, 1:5] = box gt[cell_y, cell_x, best_anchor, 5 + catId2idx[category_id]] = 1.0 return gt
def results2dets(results, image_shape): """ Convert results of yolo to [x1, y1, x2, y2, confidence] Params: results: detected results of YOLO image_shape: shape of image Return: 2d array of detected box, shape:(#detected_box, 5) """ seq_dets = [] for r in results: # Enough confidence of person if r[0] == 'person' and r[1] > 0.87: box = BoundBox(r[2][0], r[2][1], r[2][2], r[2][3], r[1], r[0]) x1 = (box.x - box.w / 2) y1 = (box.y - box.h / 2) x2 = (box.x + box.w / 2) y2 = (box.y + box.h / 2) seq_dets.append([x1, y1, x2, y2, box.c]) return np.array(seq_dets)
def __init__(self, config): self.config = config self.batch_size = config['train']['batch_size'] self.labels = config['model']['labels'] self.down_sample = 32 self.max_box_per_image = config['train']['max_box_per_image'] self.min_net_size = (config['model']['min_input_size'] // self.down_sample) * self.down_sample self.max_net_size = (config['model']['max_input_size'] // self.down_sample) * self.down_sample self.jitter = 0.3 self.norm = lambda t: t / 255.0 self.anchors = [ BoundBox(0, 0, config['model']['anchors'][2 * i], config['model']['anchors'][2 * i + 1]) for i in range(len(config['model']['anchors']) // 2) ] self.net_h = config['model']['input_size'] self.net_w = config['model']['input_size'] self.idx = 0
def __init__(self, images, config, shuffle=True, jitter=True, norm=None): self.generator = None self.images = images self.config = config self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i],\ config['ANCHORS'][2*i+1]) for i in range(int(len(\ config['ANCHORS'])//2))] sometimes = lambda aug: iaa.Sometimes(0.5, aug) self.aug_pipe = iaa.Sequential( [ sometimes(iaa.Affine( )), iaa.SomeOf((0, 5), [ iaa.OneOf([ iaa.GaussianBlur((0, 3.0)), iaa.AverageBlur(k=(2, 7)), iaa.MedianBlur(k=(3, 11)), ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255),\ per_channel=0.5), iaa.OneOf([ iaa.Dropout((0.01, 0.1), per_channel=0.5), ]), iaa.Add((-10, 10), per_channel=0.5), iaa.Multiply((0.5, 1.5), per_channel=0.5), iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), ], random_order=True ) ], random_order=True ) if shuffle: np.random.shuffle(self.images)
def __init__(self, images, config, shuffle=True, jitter=True, norm=None): self.generator = None self.images = images self.config = config self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, config['ANCHORS'][2 * i], config['ANCHORS'][2 * i + 1]) for i in range(int(len(config['ANCHORS']) // 2)) ] ### augmentors by https://github.com/aleju/imgaug sometimes = lambda aug: iaa.Sometimes(0.5, aug) # Define our sequence of augmentation steps that will be applied to every image # All augmenters with per_channel=0.5 will sample one value _per image_ # in 50% of all cases. In all other cases they will sample new values # _per channel_. self.aug_pipe = iaa.Sequential( [ # apply the following augmenters to most images #iaa.Fliplr(0.5), # horizontally flip 50% of all images #iaa.Flipud(0.2), # vertically flip 20% of all images #sometimes(iaa.Crop(percent=(0, 0.1))), # crop images by 0-10% of their height/width sometimes( iaa.Affine( #scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis #translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis) #rotate=(-5, 5), # rotate by -45 to +45 degrees #shear=(-5, 5), # shear by -16 to +16 degrees #order=[0, 1], # use nearest neighbour or bilinear interpolation (fast) #cval=(0, 255), # if mode is constant, use a cval between 0 and 255 #mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples) )), # execute 0 to 5 of the following (less important) augmenters per image # don't execute all of them, as that would often be way too strong iaa.SomeOf( (0, 5), [ #sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation iaa.OneOf([ iaa.GaussianBlur( (0, 3.0) ), # blur images with a sigma between 0 and 3.0 iaa.AverageBlur( k=(2, 7) ), # blur image using local means with kernel sizes between 2 and 7 iaa.MedianBlur( k=(3, 11) ), # blur image using local medians with kernel sizes between 2 and 7 ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images #iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images # search either for all edges or for directed edges #sometimes(iaa.OneOf([ # iaa.EdgeDetect(alpha=(0, 0.7)), # iaa.DirectedEdgeDetect(alpha=(0, 0.7), direction=(0.0, 1.0)), #])), iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), # add gaussian noise to images iaa.OneOf([ iaa.Dropout( (0.01, 0.1), per_channel=0.5 ), # randomly remove up to 10% of the pixels #iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2), ]), #iaa.Invert(0.05, per_channel=True), # invert color channels iaa.Add( (-10, 10), per_channel=0.5 ), # change brightness of images (by -10 to 10 of original value) iaa.Multiply( (0.5, 1.5), per_channel=0.5 ), # change brightness of images (50-150% of original value) iaa.ContrastNormalization( (0.5, 2.0), per_channel=0.5), # improve or worsen the contrast #iaa.Grayscale(alpha=(0.0, 1.0)), #sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths) #sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around ], random_order=True) ], random_order=True) if shuffle: np.random.shuffle(self.images)
def __getitem__(self, idx): l_bound = idx * self.config['BATCH_SIZE'] r_bound = (idx + 1) * self.config['BATCH_SIZE'] if r_bound > len(self.images): r_bound = len(self.images) l_bound = r_bound - self.config['BATCH_SIZE'] instance_count = 0 x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 3)) # input images b_batch = np.zeros( (r_bound - l_bound, 1, 1, 1, self.config['TRUE_BOX_BUFFER'], 4) ) # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes y_batch = np.zeros( (r_bound - l_bound, self.config['GRID_H'], self.config['GRID_W'], self.config['BOX'], 4 + 1 + 3 + self.config['CLASS'])) # desired network output for train_instance in self.images[l_bound:r_bound]: # augment input image and fix object's position and size img, all_objs = self.aug_image(train_instance, jitter=self.jitter) # construct output from object's x, y, w, h true_box_index = 0 for obj in all_objs: if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj[ 'ymin'] and obj['name'] in self.config['LABELS']: center_x = .5 * (obj['xmin'] + obj['xmax']) center_x = center_x / (float(self.config['IMAGE_W']) / self.config['GRID_W']) center_y = .5 * (obj['ymin'] + obj['ymax']) center_y = center_y / (float(self.config['IMAGE_H']) / self.config['GRID_H']) grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) if grid_x < self.config['GRID_W'] and grid_y < self.config[ 'GRID_H']: obj_indx = self.config['LABELS'].index(obj['name']) center_w = (obj['xmax'] - obj['xmin']) / ( float(self.config['IMAGE_W']) / self.config['GRID_W']) # unit: grid cell center_h = (obj['ymax'] - obj['ymin']) / ( float(self.config['IMAGE_H']) / self.config['GRID_H']) # unit: grid cell box = [center_x, center_y, center_w, center_h] # find the anchor that best predicts this box best_anchor = -1 max_iou = -1 shifted_box = BoundBox(0, 0, center_w, center_h) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: best_anchor = i max_iou = iou # assign ground truth x, y, w, h, confidence and class probs to y_batch y_batch[instance_count, grid_y, grid_x, best_anchor, 0:4] = box y_batch[instance_count, grid_y, grid_x, best_anchor, 4] = 1. y_batch[instance_count, grid_y, grid_x, best_anchor, 5 + obj_indx] = 1 y_batch[instance_count, grid_y, grid_x, best_anchor, 6:] = [ obj['pose_x'], obj['pose_y'], obj['pose_z'] ] # assign the true box to b_batch b_batch[instance_count, 0, 0, 0, true_box_index] = box true_box_index += 1 true_box_index = true_box_index % self.config[ 'TRUE_BOX_BUFFER'] # assign input image to x_batch if self.norm != None: x_batch[instance_count] = self.norm(img) else: # plot image and bounding boxes for sanity check for obj in all_objs: if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']: cv2.rectangle(img[:, :, ::-1], (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), (255, 0, 0), 3) cv2.putText(img[:, :, ::-1], obj['name'], (obj['xmin'] + 2, obj['ymin'] + 12), 0, 1.2e-3 * img.shape[0], (0, 255, 0), 2) x_batch[instance_count] = img # increase instance counter in current batch instance_count += 1 #print ' new batch created', idx return [x_batch, b_batch], y_batch
def _main_(args): config_path = args.conf weights_path = args.weights image_path = args.input with open(config_path) as config_buffer: config = json.load(config_buffer) ############################### # Make the model ############################### yolo = YOLO(backend = config['model']['backend'], input_shape = config['model']['input_shape'], labels = config['model']['labels'], max_box_per_image = config['model']['max_box_per_image'], anchors = config['model']['anchors']) ############################### # Load trained weights ############################### yolo.load_weights(weights_path) ############################### # Predict bounding boxes ############################### fig,ax=plt.subplots(1) # bbox x # globe eta # bbox y # globe phi # bbox width # Gaussian sigma (required to be 3*sigma<pi) # bbox height # Gaussian sigma (required to be 3*sigma<pi) file_content = np.load(image_path) images = file_content['raw'] truth_boxes = file_content['truth'] for image_index in range(10): image = images[image_index] all_objs = truth_boxes[image_index] print(image.shape) boxes = yolo.predict(image) print(len(boxes), 'boxes are found') for i in range(len(boxes)): b = boxes[i] print('box:',i,b) draw_boxes(image, ax, boxes, config['model']['labels'],color='y',scale=True) obj_boxes=[] i=0 for obj in all_objs: # x,y,w,h = obj[:4] y,x,h,w = obj[1:5] b = BoundBox(x-w/2,y-h/2,x+w/2,y+h/2) # print('box:',i,b,obj[5],obj[6],obj[7],obj[8],obj[9]) print('box:',i,obj) obj_boxes.append( b ) i+=1 draw_boxes(image, ax, obj_boxes, config['model']['labels'],color='g',scale=False) #image = draw_boxes(image, boxes, config['model']['labels']) i=np.swapaxes(np.swapaxes(image,0,1),1,2) x=np.sum(i,axis=2) #plt.imshow(x,cmap='hot') plt.imshow(x,aspect='auto',extent=(0,256,0,9600),interpolation='nearest',cmap=cm.jet) plt.savefig('out%d.png' % image_index ,dpi=200)
def test_frame_selector(): from utils import BoundBox skip_rate = 3 frame_rate = 30 Ns, Ks, Ts = 70, 30, 35 print("Ns = %d, Ks = %d, Ts = %d" % (Ns, Ks, Ts)) N, K = Ns * frame_rate / skip_rate, Ks * frame_rate T = Ts * frame_rate / skip_rate print("N = %d, K = %d, T = %d" % (N, K, T)) non_None_threshold = 1 fm_frame_selector = FmFrameSelector(N, K, T, "", label_ind=3, non_None_threshold=non_None_threshold) i = 0 image = np.zeros((640, 640, 3)) num_teeth = 6 corr_bbox1 = BoundBox(0.1, 0.1, 0.2, 0.2, 0.6, [0., 0.08, 0., 0.92, 0.], label=3) bboxes = [ BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 2, label=2), BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 2), BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 1), corr_bbox1, BoundBox(0.2, 0.2, 0.4, 0.4, 0.5, 4, label=0) ] fm_frame_selector.update(i, image, bboxes, num_teeth, write_selection=False) assert fm_frame_selector.bbox_buffer[0] == corr_bbox1 i += 1 bboxes = [] fm_frame_selector.update(i, image, bboxes, num_teeth, write_selection=False) i += 1 corr_bbox2 = BoundBox(0.1, 0.1, 0.2, 0.2, 0.22, [0.05, 0., 0., 0.90, 0.05], label=3) bboxes = [ BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 2, label=2), BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 2), BoundBox(0.1, 0.1, 0.2, 0.2, 0.9, 1), corr_bbox2, BoundBox(0.2, 0.2, 0.4, 0.4, 0.5, 4, label=0) ] fm_frame_selector.update(i, image, bboxes, num_teeth, write_selection=False) assert fm_frame_selector.bbox_buffer[-1] == corr_bbox2 assert len(fm_frame_selector.frame_buffer) == 3 assert len(fm_frame_selector.bbox_buffer) -\ fm_frame_selector.bbox_buffer.count(None) == 2 frame, bbox, ind = fm_frame_selector.select_frame() assert bbox == corr_bbox1 assert ind == 0 for j in range(100): i += 1 bboxes = [] fm_frame_selector.update(i, image, bboxes, num_teeth, write_selection=False) frame, bbox, ind = fm_frame_selector.select_frame() assert bbox == corr_bbox1 assert ind == 0
def __call__(self, images, annotations, shapes, aug=True): # get image input size, change every 10 batches if aug: self.idx += 1 net_h, net_w = self._get_net_size() else: net_h, net_w = self.config['model']['input_size'], self.config[ 'model']['input_size'] base_grid_h, base_grid_w = net_h // self.down_sample, net_w // self.down_sample x_batch = np.zeros((self.batch_size, net_h, net_w, 3), dtype=np.float32) t_batch = np.zeros( (self.batch_size, 1, 1, 1, self.max_box_per_image, 4), dtype=np.float32) # initialize the inputs and the outputs yolo_1 = np.zeros((self.batch_size, 1 * base_grid_h, 1 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels)), dtype=np.float32) yolo_2 = np.zeros((self.batch_size, 2 * base_grid_h, 2 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels)), dtype=np.float32) yolo_3 = np.zeros((self.batch_size, 4 * base_grid_h, 4 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels)), dtype=np.float32) yolos = [yolo_3, yolo_2, yolo_1] instance_count = 0 true_box_index = 0 # do the logic to fill in the inputs and the output for img, ann, shape in zip(images, annotations, shapes): ann = json.loads(ann) img = cv2.resize(img, (shape[1], shape[0])) # augment input image and fix object's position and size if aug: img, all_objs = self._aug_image(img, ann, net_h, net_w) else: img, all_objs = self._raw_image(img, ann, net_h, net_w) for obj in all_objs: # find the best anchor box for this object max_anchor = None max_index = -1 max_iou = -1 # not only max iou anchor but also larger than threshold anchors are positive. positive_anchors = [] positive_threshold = 0.3 shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: max_anchor = anchor max_index = i max_iou = iou if iou > positive_threshold: positive_anchors.append([i, anchor]) if not positive_anchors: positive_anchors.append([max_index, max_anchor]) for max_index, max_anchor in positive_anchors: # determine the yolo to be responsible for this bounding box yolo = yolos[max_index // 3] grid_h, grid_w = yolo.shape[1:3] # determine the position of the bounding box on the grid center_x = .5 * (obj['xmin'] + obj['xmax']) center_x = center_x / float( net_w) * grid_w # sigma(t_x) + c_x center_y = .5 * (obj['ymin'] + obj['ymax']) center_y = center_y / float( net_h) * grid_h # sigma(t_y) + c_y # determine the sizes of the bounding box w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h box = [center_x, center_y, w, h] # determine the index of the label obj_indx = self.labels.index(obj['name']) # determine the location of the cell responsible for this object grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) # assign ground truth x, y, w, h, confidence and class probs to y_batch yolo[instance_count, grid_y, grid_x, max_index % 3] = 0 yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1. yolo[instance_count, grid_y, grid_x, max_index % 3, 5 + obj_indx] = 1 # assign the true box to t_batch true_box = [ center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin'] ] t_batch[instance_count, 0, 0, 0, true_box_index] = true_box true_box_index += 1 true_box_index = true_box_index % self.max_box_per_image # assign input image to x_batch if aug and self.norm is not None: x_batch[instance_count] = self.norm(img) elif not aug: x_batch[instance_count] = img # increase instance counter in the current batch instance_count += 1 output = [x_batch, t_batch, yolo_1, yolo_2, yolo_3] if not aug: output += [images, annotations, shapes] return output