def test_rpn_training(self): # setup anchors = get_anchors(anchor_scales=[128, 256, 512]) anchors_per_loc = len(anchors) model_rpn = vgg16_rpn(vgg16_base(), anchors_per_loc=anchors_per_loc) cur_dir = os.path.abspath(os.path.dirname(__file__)) test_dir = os.path.join(cur_dir, os.pardir, 'test_data') base_dir = os.path.join(test_dir, 'VOC_test') ref_weights_path = os.path.join(test_dir, 'reference_rpn_weights.h5') tmp_weights_path = os.path.join(test_dir, 'tmp_rpn_weights.h5') image = extract_img_data(base_dir, '000005') training_manager = RpnTrainingManager(vgg.get_conv_rows_cols, vgg.STRIDE, preprocess_func=vgg.preprocess, anchor_dims=anchors) optimizer = Adam(lr=0.001) # action being tested train_rpn(model_rpn, [image], training_manager, optimizer, phases=[[1, 0.001]]) # assertion last_layer_weights = model_rpn.get_layer('block5_conv3').get_weights()[0] with h5py.File(tmp_weights_path, 'w') as file: file.create_dataset('last_layer_weights', data=last_layer_weights) process = Popen(['h5diff', ref_weights_path, tmp_weights_path], stdout=PIPE, stderr=PIPE) process.communicate() self.assertEqual(process.returncode, 0)
def test_resnet_frcnn_training_phase_2(self): # setup anchors = get_anchors(anchor_scales=[128, 256, 512]) anchors_per_loc = len(anchors) cur_dir = os.path.abspath(os.path.dirname(__file__)) test_dir = os.path.join(cur_dir, os.pardir, 'test_data') base_dir = os.path.join(test_dir, 'VOC_test') ref_weights_path = os.path.join(test_dir, 'reference_r50_frcnn_step2_weights.h5') tmp_weights_path = os.path.join(test_dir, 'tmp_r50_frcnn_weights.h5') rpn_weights_path = os.path.join(test_dir, 'r50_rpn_step1.h5') img = extract_img_data(base_dir, '000005') training_imgs, resized_ratios = resize_imgs([img]) model_rpn = resnet50_rpn(resnet50_base(), anchors_per_loc=anchors_per_loc) model_rpn.load_weights(filepath=rpn_weights_path) model_frcnn = resnet50_classifier(num_rois=64, num_classes=21, base_model=resnet50_base()) class_mapping = VOC_CLASS_MAPPING training_manager = DetTrainingManager(rpn_model=model_rpn, class_mapping=class_mapping, num_rois=NUM_ROIS, preprocess_func=resnet.preprocess, anchor_dims=anchors) optimizer = Adam(lr=0.001) # action being tested train_detector_step2(detector=model_frcnn, images=training_imgs, training_manager=training_manager, optimizer=optimizer, phases=[[1, 0.0001]]) # assertion last_layer_weights = model_frcnn.get_layer('res5c_branch2c').get_weights()[0] with h5py.File(tmp_weights_path, 'w') as file: file.create_dataset('last_layer_weights', data=last_layer_weights) process = Popen(['h5diff', ref_weights_path, tmp_weights_path], stdout=PIPE, stderr=PIPE) process.communicate() self.assertEqual(process.returncode, 0)
def base_paths_to_imgs(base_path_str, img_set='trainval', do_flip=True): """ Parses a command line argument containing one or multiple locations of training/inference images. :param base_path_str: string, contains absolute filesystem paths separated by commas. Each path should point to the root directory of an image set formatted according to the PASCAL VOC directory structure. :param img_set: string, one of 'train', 'val', 'trainval', or 'test'. :param do_flip: boolean, whether to include horizontally flipped copies of the images. Used for training but not inference. :return: list of shapes.Image objects. """ paths = base_path_str.split(',') imgs = [] for path in paths: img_names = get_img_names_from_set(path, img_set) curr_imgs = [ extract_img_data(path, img_name) for img_name in img_names ] imgs.extend(curr_imgs) if do_flip: flipped_imgs = [img.horizontal_flip() for img in imgs] imgs += flipped_imgs return imgs
def voc_eval(voc_path, det_file, imageset_path, cls_name, ovthresh=0.5): with open(imageset_path, 'r') as f: imagenames = [line.strip() for line in f.readlines()] gt_boxes_by_imagename = {} for i, imagename in enumerate(imagenames): if i % 100 == 0: print('Reading annotation for image {}/{}'.format( i, len(imagenames))) img = extract_img_data(voc_path, imagename) gt_boxes_by_imagename[imagename] = img.gt_boxes # extract gt objects for this class class_recs = {} npos = 0 for imagename in imagenames: R = [ box for box in gt_boxes_by_imagename[imagename] if box.obj_cls == cls_name ] bbox = np.array([box.corners for box in R]) difficult = np.array([box.difficult for box in R]).astype(np.bool) det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = { 'bbox': bbox, 'difficult': difficult, 'det': det } # read dets with open(det_file, 'r') as f: lines = f.readlines() splitlines = [x.strip().split(' ') for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) # sort by confidence sorted_ind = np.argsort(-confidence) sorted_scores = np.sort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric=True) return rec, prec, ap