def test_rpn_training(self): # setup anchors = get_anchors(anchor_scales) anchors_per_loc = len(anchors) root_dir = os.getcwd() ref_weights_path = os.path.join(root_dir, 'reference_rpn_weights.h5') tmp_weights_path = os.path.join(root_dir, 'tmp_rpn_weights.h5') train_images = make_image_object(os.path.join(root_dir, 'data'), codeTesting=True) processed_imgs, _ = resize_imgs(train_images, min_size=resize_min, max_size=resize_max) base_model = Models.vgg16_base(weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) rpn_model = Models.vgg16_rpn(base_model, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER, anchors_per_loc=anchors_per_loc) preprocess_func = Models.vgg_preprocess get_conv_rows_cols_func = Models.vgg_get_conv_rows_cols stride = Models.VGG_Stride training_manager = RpnTrainingManager(get_conv_rows_cols_func, stride, preprocess_func=preprocess_func, anchor_dims=anchors) # action being tested rpn_model = train_rpn(rpn_model, processed_imgs, training_manager, optimizer, phases=[[1, 0.001]]) print("Testing Done")
def test_resnet_frcnn_training_phase_2(self): # setup anchors = get_anchors(anchor_scales=[128, 256, 512]) anchors_per_loc = len(anchors) cur_dir = os.path.abspath(os.path.dirname(__file__)) test_dir = os.path.join(cur_dir, os.pardir, 'test_data') base_dir = os.path.join(test_dir, 'VOC_test') ref_weights_path = os.path.join(test_dir, 'reference_r50_frcnn_step2_weights.h5') tmp_weights_path = os.path.join(test_dir, 'tmp_r50_frcnn_weights.h5') rpn_weights_path = os.path.join(test_dir, 'r50_rpn_step1.h5') img = extract_img_data(base_dir, '000005') training_imgs, resized_ratios = resize_imgs([img]) model_rpn = resnet50_rpn(resnet50_base(), anchors_per_loc=anchors_per_loc) model_rpn.load_weights(filepath=rpn_weights_path) model_frcnn = resnet50_classifier(num_rois=64, num_classes=21, base_model=resnet50_base()) class_mapping = VOC_CLASS_MAPPING training_manager = DetTrainingManager(rpn_model=model_rpn, class_mapping=class_mapping, num_rois=NUM_ROIS, preprocess_func=resnet.preprocess, anchor_dims=anchors) optimizer = Adam(lr=0.001) # action being tested train_detector_step2(detector=model_frcnn, images=training_imgs, training_manager=training_manager, optimizer=optimizer, phases=[[1, 0.0001]]) # assertion last_layer_weights = model_frcnn.get_layer('res5c_branch2c').get_weights()[0] with h5py.File(tmp_weights_path, 'w') as file: file.create_dataset('last_layer_weights', data=last_layer_weights) process = Popen(['h5diff', ref_weights_path, tmp_weights_path], stdout=PIPE, stderr=PIPE) process.communicate() self.assertEqual(process.returncode, 0)
def test_rpn_training(self): # setup anchors = get_anchors(anchor_scales=[128, 256, 512]) anchors_per_loc = len(anchors) model_rpn = vgg16_rpn(vgg16_base(), anchors_per_loc=anchors_per_loc) cur_dir = os.path.abspath(os.path.dirname(__file__)) test_dir = os.path.join(cur_dir, os.pardir, 'test_data') base_dir = os.path.join(test_dir, 'VOC_test') ref_weights_path = os.path.join(test_dir, 'reference_rpn_weights.h5') tmp_weights_path = os.path.join(test_dir, 'tmp_rpn_weights.h5') image = extract_img_data(base_dir, '000005') training_manager = RpnTrainingManager(vgg.get_conv_rows_cols, vgg.STRIDE, preprocess_func=vgg.preprocess, anchor_dims=anchors) optimizer = Adam(lr=0.001) # action being tested train_rpn(model_rpn, [image], training_manager, optimizer, phases=[[1, 0.001]]) # assertion last_layer_weights = model_rpn.get_layer('block5_conv3').get_weights()[0] with h5py.File(tmp_weights_path, 'w') as file: file.create_dataset('last_layer_weights', data=last_layer_weights) process = Popen(['h5diff', ref_weights_path, tmp_weights_path], stdout=PIPE, stderr=PIPE) process.communicate() self.assertEqual(process.returncode, 0)
def train_rpn_step1(): root_dir = os.getcwd() path = os.path.join(root_dir, 'data') train_images = make_image_object(path, codeTesting=False) print("Done making image Objects") anchors = get_anchors(anchor_scales) anchors_per_loc = len(anchors) processed_imgs, resized_ratios = resize_imgs(train_images, min_size=resize_min, max_size=resize_max) stride, get_conv_rows_cols_func, preprocess_func, rpn_model = None, None, None, None if network == "vgg16": base_model = Models.vgg16_base( weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) rpn_model = Models.vgg16_rpn( base_model, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER, anchors_per_loc=anchors_per_loc) preprocess_func = Models.vgg_preprocess get_conv_rows_cols_func = Models.vgg_get_conv_rows_cols stride = Models.VGG_Stride elif network == "resnet50": base_model = Models.resnet50_base( weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) rpn_model = Models.resnet50_rpn( base_model, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER, anchors_per_loc=anchors_per_loc) preprocess_func = Models.resnet50_preprocess get_conv_rows_cols_func = Models.resnet50_get_conv_rows_cols stride = Models.ResNet_Stride save_weights_dest = "models/rpn_weights_{}_step1.h5".format(network) save_model_dest = "models/rpn_model_{}_step1.h5".format(network) training_manager = RpnTrainingManager(get_conv_rows_cols_func, stride, preprocess_func=preprocess_func, anchor_dims=anchors) rpn_model = train_rpn(rpn_model, processed_imgs, training_manager, optimizer, phases=phases, save_frequency=2000, save_weights_dest=save_weights_dest, save_model_dest=save_model_dest) rpn_model.save_weights(save_weights_dest) print('Saved {} rpn weights to {}'.format(network, save_weights_dest)) rpn_model.save(save_model_dest) print('Saved {} rpn model to {}'.format(network, save_model_dest))
def build_proposal(self): self.anchor_list = get_anchors(self.feature_input, self.im_info, self.anchor_ratio, self.base_anchors) self.rpn_layer() return self.proposal_layer()
default='.') parser.add_argument( '--det_threshold', dest='det_threshold', help= 'Minimum confidence level (from 0 to 1) needed to output a detection', default=DEFAULT_DET_THRESHOLD) args = parser.parse_args() det_threshold = float(args.det_threshold) test_imgs = base_paths_to_imgs(args.voc_path, img_set=args.img_set, do_flip=False) anchor_scales = anchor_scales_from_str(args.anchor_scales) anchors = get_anchors(anchor_scales) anchors_per_loc = len(anchors) print("num test_imgs: ", len(test_imgs)) class_mapping = KITTI_CLASS_MAPPING if args.kitti else VOC_CLASS_MAPPING num_classes = len(class_mapping) if args.network == 'vgg16': # don't need to worry about freezing/regularizing rpn because we're not training it model_rpn = vgg.rpn_from_h5(args.step3_model_path, anchors_per_loc=anchors_per_loc) model_det = vgg.det_from_h5(args.step4_model_path, num_classes=num_classes) stride = vgg.STRIDE else: model_rpn = resnet.rpn_from_h5(args.step3_model_path, anchors_per_loc=anchors_per_loc)
def train_rpn_det(): """ ## NOTE: Make NMS use 2k proposals at train time ## NOTE: DEBUGGING Script consisting of all the print statements """ root_dir = os.getcwd() path = os.path.join(root_dir, 'data') train_images = make_image_object(path, codeTesting=False) print("Done making image Objects") anchors = get_anchors(anchor_scales) anchors_per_loc = len(anchors) processed_imgs, resized_ratios = resize_imgs(train_images, min_size=resize_min, max_size=resize_max) num_classes = 2 class_mapping = {'Table': 0, 'bg': 1} # Create the record.csv file to record losses, acc and mAP record_df = pd.DataFrame(columns=[ 'mean_overlapping_bboxes', 'class_acc', 'loss_rpn_cls', 'loss_rpn_regr', 'loss_class_cls', 'loss_class_regr', 'curr_loss', 'mAP' ]) preprocess_func = Models.vgg_preprocess get_conv_rows_cols_func = Models.vgg_get_conv_rows_cols stride = Models.VGG_Stride # Working with VGG only. RPN Model: input=Input(shape=(None, None, 3)), outputs=[x_class, x_regr, base_model.output] base_model = Models.vgg16_base( weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) rpn_model = Models.vgg16_rpn(base_model, include_conv=False, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER, anchors_per_loc=anchors_per_loc) # Detector Model: inputs=[base_model.input, roi_input], outputs=[out_class, out_reg] detector_base = Models.vgg16_base( weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) detector_model = Models.vgg16_classifier( NUM_ROIS, num_classes, detector_base, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) # # this is a model that holds both the RPN and the classifier, used to load/save weights for the models # img_input = Input(shape=(None, None, 3)) # roi_input = Input(shape=(None, 4), name='roi_input') # model_all = Model([img_input, roi_input], rpn_model.output[:2] + detector_model.output) rpn_save_weights_dest = "models/combinedTraining_rpn_weights_{}.h5".format( network) det_save_weights_dest = "models/combinedTraining_detector_weights_{}.h5".format( network) rpn_save_model_dest = "models/combinedTraining_rpn_model_{}.h5".format( network) det_save_model_dest = "models/combinedTraining_detector_model_{}.h5".format( network) record_path = "models/record.csv" rpn_training_manager = RpnTrainingManager(get_conv_rows_cols_func, stride, preprocess_func=preprocess_func, anchor_dims=anchors) det_training_manager = DetTrainingManager(rpn_model=rpn_model, class_mapping=class_mapping, preprocess_func=preprocess_func, num_rois=NUM_ROIS, stride=stride, anchor_dims=anchors) rpn_model, detector_model = combined_rpn_det_trainer( rpn_model, detector_model, processed_imgs, rpn_training_manager, det_training_manager, optimizer=optimizer, phases=phases, save_frequency=2000, rpn_save_weights_dest=rpn_save_weights_dest, det_save_weights_dest=det_save_weights_dest, recordCSV=record_df, record_path=record_path) # # Weights corresponding to minimum loss already getting saved in combined_rpn_det_trainer # rpn_model.save_weights(rpn_save_weights_dest) # print('Saved {} RPN weights to {}'.format(args.network, rpn_save_weights_dest)) # detector_model.save_weights(det_save_weights_dest) # print('Saved {} DET weights to {}'.format(args.network, det_save_weights_dest)) rpn_model.save(rpn_save_model_dest) print('Saved {} RPN model to {}'.format(network, rpn_save_model_dest)) detector_model.save(det_save_model_dest) print('Saved {} DET model to {}'.format(network, det_save_model_dest)) print("\n Training Complete.") print("Plotting Losses") plotLosses(record_path, r_epochs=40)
def test_rpn_step1(): """ ## NOTE: For evaluation, work with diff num of nms proposals at test time such as 100, 300, 1k """ imgName = "1_Page1.png" anchors = get_anchors(anchor_scales) anchors_per_loc = len(anchors) root_dir = os.getcwd() test_path = os.path.join(root_dir, 'data') img = cv2.imread(os.path.join(test_path, imgName)) ## For testing on images with no GT (gt_boxes=0) img_obj = Image(name=imgName, width=img.shape[1], height=img.shape[0], gt_boxes=[], image_path=os.path.join(test_path, imgName)) resized_img, resized_ratio = img_obj.resize_within_bounds(min_size=600, max_size=1000) rpn_model, preprocess_func = None, None if network == "vgg16": base_model = Models.vgg16_base( weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) rpn_model = Models.vgg16_rpn( base_model, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER, anchors_per_loc=anchors_per_loc) preprocess_func = Models.vgg_preprocess get_conv_rows_cols_func = Models.vgg_get_conv_rows_cols stride = Models.VGG_Stride elif network == "resnet50": base_model = Models.resnet50_base( weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) rpn_model = Models.resnet50_rpn( base_model, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER, anchors_per_loc=anchors_per_loc) preprocess_func = Models.resnet50_preprocess get_conv_rows_cols_func = Models.resnet50_get_conv_rows_cols stride = Models.ResNet_Stride save_weights_dest = "models/rpn_weights_{}_step1.h5".format(network) rpn_model.load_weights(save_weights_dest, by_name=True) rpn_model.compile(optimizer=optimizer, loss=[ cls_loss_rpn(anchors_per_loc=anchors_per_loc), bbreg_loss_rpn(anchors_per_loc=anchors_per_loc) ]) batched_img = np.expand_dims(preprocess_func(resized_img.data), axis=0) # [x_class, x_regr] = rpn_model.predict_on_batch(batched_img) # Using get_training_input() of train_detector_step2(), writing a custom PredictionToROIs rois, rois_prob = RPNsToROIs(rpn_model, batched_img, anchors, stride=stride) pred = create_Prediction_Dict(rois, rois_prob) print("Region Predictions: {}".format(pred))
def test_rpn_det(): anchors = get_anchors(anchor_scales) anchors_per_loc = len(anchors) root_dir = os.getcwd() test_path = os.path.join(root_dir, 'data', 'test') num_classes = 2 bbox_threshold = 0.7 # Switch key value for class mapping class_mapping = {'Table': 0, 'bg': 1} class_mapping = {v: k for k, v in class_mapping.items()} class_to_color = { class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping } rpn_save_weights_dest = "models/combinedTraining_rpn_weights_{}.h5".format( network) det_save_weights_dest = "models/combinedTraining_detector_weights_{}.h5".format( network) preprocess_func = Models.vgg_preprocess stride = Models.VGG_Stride base_model = Models.vgg16_base( weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) rpn_model = Models.vgg16_rpn(base_model, include_conv=False, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER, anchors_per_loc=anchors_per_loc) detector_base = Models.vgg16_base( weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) detector_model = Models.vgg16_classifier( NUM_ROIS, num_classes, detector_base, weight_regularizer=Models.WEIGHT_REGULARIZER, bias_regularizer=Models.BIAS_REGULARIZER) print("RPN model I/P shape {} \nand O/P shape {}".format( rpn_model.inputs, rpn_model.output)) print("Detector model I/P shape {} \nand O/P shape {}".format( detector_model.inputs, detector_model.outputs)) print("Loading weights") rpn_model.load_weights(rpn_save_weights_dest) detector_model.load_weights(det_save_weights_dest) rpn_model.compile(optimizer='sgd', loss='mse') detector_model.compile(optimizer='sgd', loss='mse') print("----------------- RPN MODEL -----------------") # print(rpn_model.summary()) # plot_model(rpn_model, to_file='rpnModel.png', show_shapes=True) print("----------------- DET MODEL -----------------") # print(detector_model.summary()) # plot_model(detector_model, to_file="detModel.png", show_shapes=True) test_images = [ image for image in os.listdir(test_path) if image.endswith(".png") ] for imgName in test_images: print(imgName) img = cv2.imread(os.path.join(test_path, imgName)) img_obj = Image(name=imgName, width=img.shape[1], height=img.shape[0], gt_boxes=[], image_path=os.path.join(test_path, imgName)) resized_img_obj, resized_ratio = img_obj.resize_within_bounds( min_size=600, max_size=1000) batched_img = np.expand_dims(preprocess_func(resized_img_obj.data), axis=0) # [Y1, Y2] = rpn_model.predict_on_batch(batched_img) # Get bboxes by applying NMS # R.shape = (300, 4) R, rois_prob = RPNsToROIs(rpn_model, batched_img, anchors, stride=stride) # convert from (x1,y1,x2,y2) to (x1,y1,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0] // NUM_ROIS + 1): # comes out to 4 + 1 ROIs = np.expand_dims(R[NUM_ROIS * jk:NUM_ROIS * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break # For Images on which RPN returns zero ROIs if jk == R.shape[0] // NUM_ROIS: # padding R if the last set ROIS is less than the required NUM_ROIS, # Reqd. since DET network uses fc layer with input size (64,7,7,512) curr_shape = ROIs.shape target_shape = (curr_shape[0], NUM_ROIS, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded print("ROIs: {} having shape = {}".format("ROIs", ROIs.shape)) print("Batched Image: {}".format(batched_img.shape)) [P_cls, P_regr] = detector_model.predict_on_batch( [batched_img, ROIs]) # P_cls.shape = (1,64,2) and P_regr.shape = (1,64,4) # Calculate all classes' b-boxes coordinates on re-sized image # Drop 'bg' classes b-boxes for ii in range(P_cls.shape[1]): # Ignore 'bg' class if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax( P_cls[0, ii, :] ) == ( P_cls.shape[2] - 1 ): # i.e. if index containing max prob is == shape(=2 for 2 class) - 1 continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] x, y, w, h = apply_regr(x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([ stride * x, stride * y, stride * (x + w), stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = nms(bbox, np.array(probs[key]), overlap_thresh=0.2) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] # Calculate real coordinates on original image (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(resized_ratio, x1, y1, x2, y2) cv2.rectangle( img, (real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 4) textLabel = '{}: {}'.format(key, int(100 * new_probs[jk])) all_dets.append((key, 100 * new_probs[jk])) (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) textOrg = (real_x1, real_y1 - 0) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 1) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) print(all_dets) plt.figure(figsize=(10, 10)) plt.grid() plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) plt.show()