def read_image_and_gt(img_files, gt_files, config): ''' Transform images and send transformed image and label :param img_files: list of image files including the path of a batch :param gt_files: list of gt files including the path of a batch :param config: config dict containing various hyperparameters :return images and annotations ''' labels = [] bboxes = [] deltas = [] aidxs = [] #loads annotations from file def load_annotation(gt_file): with open(gt_file, 'r') as f: lines = f.readlines() f.close() annotations = [] #each line is an annotation bounding box for line in lines: obj = line.strip().split(' ') #get class, if class is not in listed, skip it try: cls = config.CLASS_TO_IDX[obj[0].lower().strip()] # print(cls) #get coordinates xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) #check for valid bounding boxes assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}' \ .format(xmin, xmax, gt_file) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}' \ .format(ymin, ymax, gt_file) #transform to point + width and height representation x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) annotations.append([x, y, w, h, cls]) except: print(obj) continue return annotations #init tensor of images imgs = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT, config.IMAGE_WIDTH, config.N_CHANNELS)) img_idx = 0 #iterate files for img_name, gt_name in zip(img_files, gt_files): #open img img = cv2.imread(img_name).astype(np.float32, copy=False) # scale image img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT)) #subtract means img = (img - np.mean(img)) / np.std(img) #store original height and width? orig_h, orig_w, _ = [float(v) for v in img.shape] #print(orig_h, orig_w) # load annotations annotations = load_annotation(gt_name) #split in classes and boxes labels_per_file = [a[4] for a in annotations] bboxes_per_file = np.array([a[0:4] for a in annotations]) #dynamic Data Augmentation img = np.asarray(img) bboxes_per_file = [ fix_bbox(*img.shape[:2], bbox) for bbox in bboxes_per_file ] #print(bboxes_per_file) annotations = { 'image': img, 'bboxes': bboxes_per_file, 'labels': labels_per_file } augmented = _aug(config)(**annotations) img = augmented['image'] bboxes_per_file = np.array(augmented['bboxes']) labels_per_file = augmented['labels'] #and store imgs[img_idx] = img img_idx += 1 # scale annotation x_scale = config.IMAGE_WIDTH / orig_w y_scale = config.IMAGE_HEIGHT / orig_h #scale boxes bboxes_per_file[:, 0::2] = bboxes_per_file[:, 0::2] * x_scale bboxes_per_file[:, 1::2] = bboxes_per_file[:, 1::2] * y_scale bboxes.append(bboxes_per_file) aidx_per_image, delta_per_image = [], [] aidx_set = set() #iterate all bounding boxes for a file for i in range(len(bboxes_per_file)): #compute overlaps of bounding boxes and anchor boxes overlaps = batch_iou(config.ANCHOR_BOX, bboxes_per_file[i]) #achor box index aidx = len(config.ANCHOR_BOX) #sort for biggest overlaps for ov_idx in np.argsort(overlaps)[::-1]: #when overlap is zero break if overlaps[ov_idx] <= 0: break #if one is found add and break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx break # if the largest available overlap is 0, choose the anchor box with the one that has the # smallest square distance if aidx == len(config.ANCHOR_BOX): dist = np.sum(np.square(bboxes_per_file[i] - config.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break #compute deltas for regression box_cx, box_cy, box_w, box_h = bboxes_per_file[i] delta = [0] * 4 delta[0] = (box_cx - config.ANCHOR_BOX[aidx][0] ) / config.ANCHOR_BOX[aidx][2] delta[1] = (box_cy - config.ANCHOR_BOX[aidx][1] ) / config.ANCHOR_BOX[aidx][3] delta[2] = np.log(box_w / config.ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h / config.ANCHOR_BOX[aidx][3]) aidx_per_image.append(aidx) delta_per_image.append(delta) deltas.append(delta_per_image) aidxs.append(aidx_per_image) labels.append(labels_per_file) #we need to transform this batch annotations into a form we can feed into the model label_indices, bbox_indices, box_delta_values, mask_indices, box_values, \ = [], [], [], [], [] aidx_set = set() #iterate batch for i in range(len(labels)): #and annotations for j in range(len(labels[i])): if (i, aidxs[i][j]) not in aidx_set: aidx_set.add((i, aidxs[i][j])) label_indices.append([i, aidxs[i][j], labels[i][j]]) mask_indices.append([i, aidxs[i][j]]) bbox_indices.extend([[i, aidxs[i][j], k] for k in range(4)]) box_delta_values.extend(deltas[i][j]) box_values.extend(bboxes[i][j]) #transform them into matrices input_mask = np.reshape( sparse_to_dense(mask_indices, [config.BATCH_SIZE, config.ANCHORS], [1.0] * len(mask_indices)), [config.BATCH_SIZE, config.ANCHORS, 1]) box_delta_input = sparse_to_dense(bbox_indices, [config.BATCH_SIZE, config.ANCHORS, 4], box_delta_values) box_input = sparse_to_dense(bbox_indices, [config.BATCH_SIZE, config.ANCHORS, 4], box_values) labels = sparse_to_dense( label_indices, [config.BATCH_SIZE, config.ANCHORS, config.CLASSES], [1.0] * len(label_indices)) #concatenate ouputs Y = np.concatenate((input_mask, box_input, box_delta_input, labels), axis=-1).astype(np.float32) return imgs, Y
def read_image_and_gt_with_original(img_files, gt_files, config): ''' Transform images and send transformed image and label, but also return the image only resized :param img_files: list of image files including the path of a batch :param gt_files: list of gt files including the path of a batch :param config: config dict containing various hyperparameters :return images and annotations ''' labels = [] bboxes = [] deltas = [] aidxs = [] #loads annotations from file def load_annotation(gt_file): with open(gt_file, 'r') as f: lines = f.readlines() f.close() annotations = [] #each line is an annotation bounding box for line in lines: obj = line.strip().split(' ') #get class try: cls = config.CLASS_TO_IDX[obj[0].lower().strip()] # print cls #get coordinates xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) #check for valid bounding boxes assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}' \ .format(xmin, xmax, gt_file) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}' \ .format(ymin, ymax, gt_file) #transform to point + width and height representation x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) annotations.append([x, y, w, h, cls]) except: continue return annotations imgs = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT, config.IMAGE_WIDTH, config.N_CHANNELS)) imgs_only_resized = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT, config.IMAGE_WIDTH, config.N_CHANNELS)) img_idx = 0 #iterate files for img_name, gt_name in zip(img_files, gt_files): #open img img = cv2.imread(img_name).astype(np.float32, copy=False) #store original height and width? orig_h, orig_w, _ = [float(v) for v in img.shape] # scale image img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT)) imgs_only_resized[img_idx] = img #subtract means img = (img - np.mean(img)) / np.std(img) #print(orig_h, orig_w) # load annotations annotations = load_annotation(gt_name) #split in classes and boxes labels_per_file = [a[4] for a in annotations] bboxes_per_file = np.array([a[0:4] for a in annotations]) #TODO enable dynamic Data Augmentation """ if config.DATA_AUGMENTATION: assert mc.DRIFT_X >= 0 and mc.DRIFT_Y > 0, \ 'mc.DRIFT_X and mc.DRIFT_Y must be >= 0' if mc.DRIFT_X > 0 or mc.DRIFT_Y > 0: # Ensures that gt boundibg box is not cutted out of the image max_drift_x = min(gt_bbox[:, 0] - gt_bbox[:, 2] / 2.0 + 1) max_drift_y = min(gt_bbox[:, 1] - gt_bbox[:, 3] / 2.0 + 1) assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image' dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y)) dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x)) # shift bbox gt_bbox[:, 0] = gt_bbox[:, 0] - dx gt_bbox[:, 1] = gt_bbox[:, 1] - dy # distort image orig_h -= dy orig_w -= dx orig_x, dist_x = max(dx, 0), max(-dx, 0) orig_y, dist_y = max(dy, 0), max(-dy, 0) distorted_im = np.zeros( (int(orig_h), int(orig_w), 3)).astype(np.float32) distorted_im[dist_y:, dist_x:, :] = im[orig_y:, orig_x:, :] im = distorted_im # Flip image with 50% probability if np.random.randint(2) > 0.5: im = im[:, ::-1, :] gt_bbox[:, 0] = orig_w - 1 - gt_bbox[:, 0] """ #and store imgs[img_idx] = np.asarray(img) # img_idx += 1 # scale annotation x_scale = config.IMAGE_WIDTH / orig_w y_scale = config.IMAGE_HEIGHT / orig_h #scale boxes bboxes_per_file[:, 0::2] = bboxes_per_file[:, 0::2] * x_scale bboxes_per_file[:, 1::2] = bboxes_per_file[:, 1::2] * y_scale bboxes.append(bboxes_per_file) aidx_per_image, delta_per_image = [], [] aidx_set = set() #iterate all bounding boxes for a file for i in range(len(bboxes_per_file)): #compute overlaps of bounding boxes and anchor boxes overlaps = batch_iou(config.ANCHOR_BOX, bboxes_per_file[i]) #achor box index aidx = len(config.ANCHOR_BOX) #sort for biggest overlaps for ov_idx in np.argsort(overlaps)[::-1]: #when overlap is zero break if overlaps[ov_idx] <= 0: break #if one is found add and break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx break # if the largest available overlap is 0, choose the anchor box with the one that has the # smallest square distance if aidx == len(config.ANCHOR_BOX): dist = np.sum(np.square(bboxes_per_file[i] - config.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break #compute deltas for regression box_cx, box_cy, box_w, box_h = bboxes_per_file[i] delta = [0] * 4 delta[0] = (box_cx - config.ANCHOR_BOX[aidx][0] ) / config.ANCHOR_BOX[aidx][2] delta[1] = (box_cy - config.ANCHOR_BOX[aidx][1] ) / config.ANCHOR_BOX[aidx][3] delta[2] = np.log(box_w / config.ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h / config.ANCHOR_BOX[aidx][3]) aidx_per_image.append(aidx) delta_per_image.append(delta) deltas.append(delta_per_image) aidxs.append(aidx_per_image) labels.append(labels_per_file) #print(labels) #we need to transform this batch annotations into a form we can feed into the model label_indices, bbox_indices, box_delta_values, mask_indices, box_values, \ = [], [], [], [], [] aidx_set = set() #iterate batch for i in range(len(labels)): #and annotations for j in range(len(labels[i])): if (i, aidxs[i][j]) not in aidx_set: aidx_set.add((i, aidxs[i][j])) label_indices.append([i, aidxs[i][j], labels[i][j]]) mask_indices.append([i, aidxs[i][j]]) bbox_indices.extend([[i, aidxs[i][j], k] for k in range(4)]) box_delta_values.extend(deltas[i][j]) box_values.extend(bboxes[i][j]) #transform them into matrices input_mask = np.reshape( sparse_to_dense(mask_indices, [config.BATCH_SIZE, config.ANCHORS], [1.0] * len(mask_indices)), [config.BATCH_SIZE, config.ANCHORS, 1]) box_delta_input = sparse_to_dense(bbox_indices, [config.BATCH_SIZE, config.ANCHORS, 4], box_delta_values) box_input = sparse_to_dense(bbox_indices, [config.BATCH_SIZE, config.ANCHORS, 4], box_values) labels = sparse_to_dense( label_indices, [config.BATCH_SIZE, config.ANCHORS, config.CLASSES], [1.0] * len(label_indices)) #concatenate ouputs Y = np.concatenate((input_mask, box_input, box_delta_input, labels), axis=-1).astype(np.float32) return imgs, Y, imgs_only_resized