示例#1
0
def predict(args_):
    path = args_.path
    with open(
            '/media/private/Ci/log/plane/frcnn/vgg-adam-18-08-23-1/config.pickle',
            'rb') as f_in:
        cfg = pickle.load(f_in)
    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False

    class_mapping = cfg.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 512)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(cfg.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)
    model_classifier.summary()
    model_rpn.summary()
    cfg.model_path = '/media/private/Ci/log/plane/frcnn/vgg-adam-18-08-23-1/loss/loss-4.0496-rpnc-3.3445-rpnr-0.4483-cls-0.1388-cr-0.1180.hdf5'

    print('Loading weights from {}'.format(cfg.model_path))
    model_rpn.load_weights(cfg.model_path, by_name=True)
    model_classifier.load_weights(cfg.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    if os.path.isdir(path):
        for idx, img_name in enumerate(sorted(os.listdir(path))):
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            predict_single_image(os.path.join(path, img_name), model_rpn,
                                 model_classifier_only, cfg, class_mapping)
    elif os.path.isfile(path):
        print('predict image from {}'.format(path))
        predict_single_image(path, model_rpn, model_classifier_only, cfg,
                             class_mapping)
示例#2
0
def load_models_for_test(C, class_mapping):
    if C.network == 'vgg':
        from keras_frcnn import vgg as nn
    elif C.network == 'vgg_lite':
        from keras_frcnn import vgg_lite as nn
    elif C.network == 'mobilenet':
        from keras_frcnn import mobilenet as nn
    elif C.network == 'squeezenet':
        from keras_frcnn import squeezenet as nn
    else:
        import keras_frcnn.resnet as nn

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg' or C.network == 'vgg_lite':
        num_features = 512
    elif C.network == 'squeezenet' or C.network == 'mobilenet':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier_only.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier_only.compile(optimizer='sgd', loss='mse')

    return model_rpn, model_classifier_only
                                             C,
                                             nn.get_img_output_length,
                                             K.common.image_dim_ordering(),
                                             mode='val',
                                             hist_equal=True)

if K.common.image_dim_ordering() == 'th':
    input_shape_img = (3, 800, 800)
else:
    input_shape_img = (800, 800, 3)

img_input = Input(shape=input_shape_img)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(nn.nn_base(img_input, trainable=True), num_anchors)

model_rpn = Model(img_input, rpn[:2])

# load pretrained weights
try:
    if options.load is None:
        print('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights, by_name=True)
except:
    print(
        'Could not load pretrained model weights. Weights can be found in the keras application folder https://github.com/fchollet/keras/tree/master/keras/applications'
    )

# optimizer setup
if options.optimizers == "SGD":
示例#4
0
print('Num val samples {}'.format(len(val_imgs)))


data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(),  mode='val')

if K.image_dim_ordering() == 'th':
	input_shape_img = (3, None, None)
else:
	input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)  #相当于占位符
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)  #用一个函数输出base_net的feature map

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)  #一个位置生成多少个anchors
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)

try:
	print('loading weights from {}'.format(C.base_net_weights))
示例#5
0
                                             C,
                                             nn.get_img_output_length,
                                             K.image_dim_ordering(),
                                             mode='val')

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
else:
    input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(
    img_input,
    trainable=False)  # the base network should be fixed (transfer learning)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers,
                           roi_input,
                           C.num_rois,
                           nb_classes=len(classes_count),
                           trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)
示例#6
0
	num_features = 512

if K.image_dim_ordering() == 'th':
	input_shape_img = (3, None, None)
	input_shape_features = (num_features, None, None)
else:
	input_shape_img = (None, None, 3)
	input_shape_features = (None, None, num_features)


img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layers = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping))

model_rpn = Model(img_input, rpn_layers)
model_classifier = Model([feature_map_input, roi_input], classifier)
# model loading
if options.load == None:
  print('Loading weights from {}'.format(C.model_path))
  model_rpn.load_weights(C.model_path, by_name=True)
  model_classifier.load_weights(C.model_path, by_name=True)
else:
data_gen_train = data_generators.get_anchor_gt(
    train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
data_gen_val = data_generators.get_anchor_gt(
    val_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='val')

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
else:
    input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(
    classes_count), trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

# this is a model that holds both the RPN and the classifier, used to
# load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)
示例#8
0
def test(image_path,mode = 'test'):
	## get the dict for labels
	class_mapping = C.class_mapping
	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.items()}
	print(class_mapping)
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
	C.num_rois = int(options.num_rois) # 32 for default

	model_path = 'model_frcnn_cat.h5'
	# model_path = 'model_frcnn_cat_tested.h5'
	# model_path = 'model_frcnn (1).h5'

	## shape for input
	input_shape_img = (None, None, 3)
	# input_shape_features = (None, None, num_features)
	## rebuild the model in train_frcnn
	img_input = Input(shape=input_shape_img)
	roi_input = Input(shape=(C.num_rois, 4))
	## Bone network of Vgg16
	shared_layers = nn.nn_base(img_input, trainable=True)
	## network of rpn and classifcation
	num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)                                                                      ##
	rpn = nn.rpn(shared_layers, num_anchors)                                                                              ##
	## [out_class, out_reg]  ## num_rois = 4                                                                              ##
	classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes = len(class_mapping), trainable = True)   ##
	## build model for each network Model(input, output)                                                                  ##
	model_rpn = Model(img_input, rpn[:2])  ## because rpn[2] is base_layers(input)                                        ##
	model_classifier = Model([img_input, roi_input], classifier)                                                          ##
	model_all = Model([img_input, roi_input], rpn[:2]+classifier)
	##
	print('Loading weights from {}'.format(model_path))
	model_rpn.load_weights(model_path, by_name=True)
	model_classifier.load_weights(model_path, by_name=True)
	model_rpn.compile(optimizer='sgd', loss='mse')
	model_classifier.compile(optimizer='sgd', loss='mse')


	bbox_threshold = 0.7
	image = cv2.imread(image_path)
	## resize make the shorter side to be 600
	## and get the resize ratio
	X, ratio = format_img(image, C)
	## make predict
	[Y1, Y2] = model_rpn.predict(X)
	##
	R = roi_helpers.rpn_to_roi(Y1, Y2, C, overlap_thresh=0.7)
	#X2,Y1,Y2,IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping)
	# convert from (x1,y1,x2,y2) to (x,y,w,h)
	R[:, 2] -= R[:, 0]
	R[:, 3] -= R[:, 1]
	# apply the spatial pyramid pooling to the proposed regions
	bboxes = {}
	probs = {}


	for jk in range(R.shape[0]//C.num_rois + 1):
		## take 4 ROIs each time
	    ## 1, 32, 4
		ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)

		if ROIs.shape[1] == 0:
			break

		## when it comes to the last time
		if jk == R.shape[0]//C.num_rois:
			#pad R
			curr_shape = ROIs.shape # 1,4,4
			## 1 4 4
			target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
			ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
			ROIs_padded[:, :curr_shape[1], :] = ROIs
			ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
			ROIs = ROIs_padded

		[P_cls, P_reg] = model_classifier.predict([X, ROIs])

		for ii in range(P_cls.shape[1]): ##32

			print('Max value')
			print(np.max(P_cls[0, ii, :]))
			print('label map')
			print(np.argmax(P_cls[0, ii, :]))

			if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
				continue

			## we get the predict truth
			cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
			if cls_name not in bboxes:
				bboxes[cls_name] = []
				probs[cls_name] = []

			(x, y, w, h) = ROIs[0, ii, :]

			cls_num = np.argmax(P_cls[0, ii, :])
			try:
				(tx, ty, tw, th) = P_reg[0, ii, 4*cls_num:4*(cls_num+1)]
				tx /= C.classifier_regr_std[0]
				ty /= C.classifier_regr_std[1]
				tw /= C.classifier_regr_std[2]
				th /= C.classifier_regr_std[3]
				x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
			except:
				pass
			## rpn_stride = 16
			bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
			probs[cls_name].append(np.max(P_cls[0, ii, :]) )

	all_dets = []
	#print(bboxes)

	## show time !
	for key in bboxes:
		bbox = np.array(bboxes[key])

		new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.35)

		for jk in range(new_boxes.shape[0]):
			(x1, y1, x2, y2) = new_boxes[jk,:]

			(real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

			cv2.rectangle(image,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

			textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
			all_dets.append((key,100*new_probs[jk]))

			(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
			textOrg = (real_x1, real_y1-0)

			cv2.rectangle(image, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
			cv2.rectangle(image, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
			cv2.putText(image, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

	#print('Elapsed time = {}'.format(time.time() - st))

	return (image)
示例#9
0
#### Load model ########
########################
if K.image_dim_ordering() == 'th':
    input_shape_img = (C.channels, None, None)
    input_shape_features = (num_features, None, None)
else:  # tensorflow backend
    input_shape_img = (None, None, C.channels)
    input_shape_features = (None, None, num_features)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))
feature_map_input = Input(
    shape=input_shape_features)  # used for prediction validation

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True, channels=C.channels)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers,
                           roi_input,
                           C.num_rois,
                           nb_classes=len(classes_count),
                           trainable=True)
classifier_only = nn.classifier(feature_map_input,
                                roi_input,
                                C.num_rois,
                                nb_classes=len(classes_count),
                                trainable=True)
示例#10
0
def load_model(config_path, model_path):
    """
    config_path : config pickle file
    weigths_path : model.h5 file
    """
    for f in [config_path, model_path]:
        if not os.path.isfile(f):
            print("cant find {}".format(f))
            assert 0

    # Load model config file
    with open(config_path, 'rb') as f_in:
        C = pickle.load(f_in)
    # Turn of augmentation during test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    if C.network == 'resnet50':
        assert 0, "Does not support resnet50 network"
    elif C.network == 'vgg':
        print("Using VGG16 as faster-rcnn base network")
        global num_features
        num_features = 512

    # Define model path & lodel base model (without weights)
    C.model_path = model_path
    print('model path is {}'.format(C.model_path))

    global class_mapping
    global class_to_color
    class_mapping = C.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)
    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    global num_rois
    C.num_rois = int(num_rois)

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    model_rpn = Model(img_input, rpn_layers)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading model weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)
    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')
    print("Done loading model")

    model_dict = {
        'C': C,
        'model_rpn': model_rpn,
        'model_classifier': model_classifier,
        'class_to_color': class_to_color,
    }

    return model_dict
示例#11
0
                                             mode='val')

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
else:
    input_shape_img = (None, None, 3)

rgb_img_input = Input(shape=input_shape_img)
therm_img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))
#print(train_imgs[0])
#print(len(val_imgs))
#raw_input('hello')
# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(
    rgb_img_input, therm_img_input,
    trainable=True)  ### passing a list of inputs instead of one input

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers,
                           roi_input,
                           C.num_rois,
                           nb_classes=len(classes_count),
                           trainable=True)

model_rpn = Model([rgb_img_input, therm_img_input], rpn[:2])
model_classifier = Model([rgb_img_input, therm_img_input, roi_input],
                         classifier)
def train_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'kitti_simple_label.txt'

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    # all_images,一个dict 将 文件地址,图像尺寸,以及所有bounding box 的所有位置标出来,以及这个图像是用来trainval还是test:
    # {'filepath': './Datasets/training/image_2/000001.png', 'width': 1242, 'height': 375,
    #  'bboxes': [{'class': 'Truck', 'x1': 599, 'x2': 629, 'y1': 156, 'y2': 189},
    #             {'class': 'Car', 'x1': 387, 'x2': 423, 'y1': 181, 'y2': 203},
    #             {'class': 'Cyclist', 'x1': 676, 'x2': 688, 'y1': 163, 'y2': 193},
    #             {'class': 'DontCare', 'x1': 503, 'x2': 590, 'y1': 169, 'y2': 190},
    #             {'class': 'DontCare', 'x1': 511, 'x2': 527, 'y1': 174, 'y2': 187},
    #             {'class': 'DontCare', 'x1': 532, 'x2': 542, 'y1': 176, 'y2': 185},
    #             {'class': 'DontCare', 'x1': 559, 'x2': 575, 'y1': 175, 'y2': 183}], 'imageset': 'trainval'}

    # classes_count 一个dict:
    # {'Pedestrian': 3, 'Truck': 1, 'Car': 29, 'Cyclist': 2, 'DontCare': 29, 'Misc': 1}
    # class_mapping 一个 dict:
    # {'Pedestrian': 0, 'Truck': 1, 'Car': 2, 'Cyclist': 3, 'DontCare': 4, 'Misc': 5}

    # bg 类为 background 类,有时候增加 hard negative mining 会增加一些什么都没有的background 的背景类
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 1000
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    rpncallback = TensorBoard(log_dir='./logs/rpn',
                              histogram_freq=1,
                              batch_size=1,
                              write_graph=True,
                              write_grads=False,
                              write_images=False,
                              embeddings_freq=0,
                              embeddings_layer_names=None,
                              embeddings_metadata=None,
                              embeddings_data=None,
                              update_freq='epoch')
    classifiercallback = TensorBoard(log_dir='./logs/classifier',
                                     histogram_freq=1,
                                     batch_size=1,
                                     write_graph=True,
                                     write_grads=False,
                                     write_images=False,
                                     embeddings_freq=0,
                                     embeddings_layer_names=None,
                                     embeddings_metadata=None,
                                     embeddings_data=None,
                                     update_freq='epoch')
    allcallback = TensorBoard(log_dir='./logs/all',
                              histogram_freq=1,
                              batch_size=1,
                              write_graph=True,
                              write_grads=False,
                              write_images=False,
                              embeddings_freq=0,
                              embeddings_layer_names=None,
                              embeddings_metadata=None,
                              embeddings_data=None,
                              update_freq='epoch')

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
示例#13
0
    def __init__(self,
                 num_rois=32,
                 config_filename='config.pickle',
                 network='vgg',
                 write=False,
                 load=None):

        config_output_filename = config_filename
        self.write = write
        with open(config_output_filename, 'rb') as f_in:
            self.C = pickle.load(f_in)

        # we will use resnet. may change to vgg
        if network == 'vgg':
            self.C.network = 'vgg16'
            from keras_frcnn import vgg as nn
        elif network == 'resnet50':
            from keras_frcnn import resnet as nn

            self.C.network = 'resnet50'
        elif network == 'vgg19':
            from keras_frcnn import vgg19 as nn

            self.C.network = 'vgg19'
        elif network == 'mobilenetv1':
            from keras_frcnn import mobilenetv1 as nn

            self.C.network = 'mobilenetv1'
        elif network == 'mobilenetv1_05':
            from keras_frcnn import mobilenetv1_05 as nn

            self.C.network = 'mobilenetv1_05'
        elif network == 'mobilenetv1_25':
            from keras_frcnn import mobilenetv1_25 as nn

            self.C.network = 'mobilenetv1_25'
        elif network == 'mobilenetv2':
            from keras_frcnn import mobilenetv2 as nn

            self.C.network = 'mobilenetv2'
        else:
            print('Not a valid model')
            raise ValueError

        # turn off any data augmentation at test time
        self.C.use_horizontal_flips = False
        self.C.use_vertical_flips = False
        self.C.rot_90 = False

        self.class_mapping = self.C.class_mapping

        if 'bg' not in self.class_mapping:
            self.class_mapping['bg'] = len(self.class_mapping)

        self.class_mapping = {v: k for k, v in self.class_mapping.items()}
        print(self.class_mapping)
        self.class_to_color = {
            self.class_mapping[v]: np.random.randint(0, 255, 3)
            for v in self.class_mapping
        }
        self.C.num_rois = num_rois

        if self.C.network == 'resnet50':
            num_features = 1024
        else:
            # may need to fix this up with your backbone..!
            print("backbone is not resnet50. number of features chosen is 512")
            num_features = 512

        if K.image_dim_ordering() == 'th':
            input_shape_img = (3, None, None)
            input_shape_features = (num_features, None, None)
        else:
            input_shape_img = (None, None, 3)
            input_shape_features = (None, None, num_features)

        img_input = Input(shape=input_shape_img)
        roi_input = Input(shape=(self.C.num_rois, 4))
        feature_map_input = Input(shape=input_shape_features)

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        num_anchors = len(self.C.anchor_box_scales) * len(
            self.C.anchor_box_ratios)
        rpn_layers = nn.rpn(shared_layers, num_anchors)

        classifier = nn.classifier(feature_map_input,
                                   roi_input,
                                   self.C.num_rois,
                                   nb_classes=len(self.class_mapping),
                                   trainable=True)

        self.model_rpn = Model(img_input, rpn_layers)
        self.model_classifier_only = Model([feature_map_input, roi_input],
                                           classifier)

        model_classifier = Model([feature_map_input, roi_input], classifier)

        # model loading
        if load == None:
            print('Loading weights from {}'.format(self.C.model_path))
            self.model_rpn.load_weights(self.C.model_path, by_name=True)
            model_classifier.load_weights(self.C.model_path, by_name=True)
        else:
            print('Loading weights from {}'.format(load))
            self.model_rpn.load_weights(load, by_name=True)
            model_classifier.load_weights(load, by_name=True)

        self.model_rpn.compile(optimizer='sgd', loss='mse')
        model_classifier.compile(optimizer='sgd', loss='mse')

        self.bbox_threshold = 0.8
        visualise = True
        self.idx = 0
示例#14
0
print('Num train samples {}'.format(len(train_imgs)))
print('Num val samples {}'.format(len(val_imgs)))

data_gen_train = data_generators.get_modified_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.common.image_dim_ordering(), mode='train')
data_gen_val = data_generators.get_modified_gt(val_imgs, classes_count, C, nn.get_img_output_length, K.common.image_dim_ordering(), mode='val')

if K.common.image_dim_ordering() == 'th':
	input_shape_img = (3, 800, 800)
else:
	input_shape_img = (800, 800, 3)

img_input = Input(shape=input_shape_img)

# define the modified RPN, built on the base layers
rpn_modified = nn.rpn_modified(nn.nn_base(img_input, trainable=True))

model_rpn = Model(img_input, rpn_modified[:2])

# load pretrained weights
try:
	if options.load is None:
		print('loading weights from {}'.format(C.base_net_weights))
		model_rpn.load_weights(C.base_net_weights, by_name=True)
except:
	print('Could not load pretrained model weights. Weights can be found in the keras application folder https://github.com/fchollet/keras/tree/master/keras/applications')

# optimizer setup
if options.optimizers == "SGD":
	optimizer = SGD(lr=options.lr/100, decay=0.0005, momentum=0.9)
else:
示例#15
0
def predict(filepath):
    import os
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)

    sys.setrecursionlimit(40000)

    parser = OptionParser()

    num_rois = 10
    config_filename = "config_combine_200.pickle"
    network = "vgg"
    write = True
    load = "models/vgg/combine_200.hdf5"

    config_output_filename = config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    # turn off any data augmentation at test time
    C.network = "vgg16"
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == "mobilenetv2":
        num_features = 320
    else:
        # may need to fix this up with your backbone..!
        print("backbone is not resnet50. number of features chosen is 512")
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping))

    model_rpn = Model(img_input, rpn_layers)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    # model loading
    if load == None:
        print('Loading weights from {}'.format(C.model_path))
        model_rpn.load_weights(C.model_path, by_name=True)
        model_classifier.load_weights(C.model_path, by_name=True)
    else:
        print('Loading weights from {}'.format(load))
        model_rpn.load_weights(load, by_name=True)
        model_classifier.load_weights(load, by_name=True)

    #model_rpn.compile(optimizer='adam', loss='mse')
    #model_classifier.compile(optimizer='adam', loss='mse')

    all_imgs = []
    classes = {}
    bbox_threshold = 0.5

    visualise = True
    num_rois = C.num_rois

    st = time.time()
    if "/" in filepath:
        img_name = filepath.split("/")[-1]
    else:
        img_name = filepath.split("\\")[-1]

    img = cv2.imread(filepath)

    # preprocess image
    X, ratio = format_img(img, C)
    img_scaled = (np.transpose(X[0, :, :, :],
                               (1, 2, 0)) + 127.5).astype('uint8')
    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))
    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.7)
    # print(R.shape)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}
    for jk in range(R.shape[0] // num_rois + 1):
        ROIs = np.expand_dims(R[num_rois * jk:num_rois * (jk + 1), :], axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier.predict([F, ROIs])
        print(P_cls)

        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < 0.6 or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []
            (x, y, w, h) = ROIs[0, ii, :]

            bboxes[cls_name].append(
                [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    for key in bboxes:
        # print(key)
        # print(len(bboxes[key]))
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.1)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]
            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)

            textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 - 0)

            # cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
            # cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 255), 1)

    print('Elapsed time = {}'.format(time.time() - st))
    print(all_dets)
    print(bboxes)
    # enable if you want to show pics
    if write:
        if not os.path.isdir("app/static/Deployment/results"):
            os.mkdir("app/static/Deployment/results")
        cv2.imwrite(
            os.path.join('app/static/Deployment/results',
                         '{}'.format(img_name)), img)


# if __name__ == "__main__":
#     filepath = "111.jpg"
#     predict(filepath)
示例#16
0
def work(num_ep, textEdit, traval_path, weights, dataAug_hf, dataAug_vf,
         dataAug_rot, base_network, curvelist):

    from keras_frcnn import losses as losses
    ##python /run train_frcnn.py -p ./ --input_weight_path ./model_frcnn.hdf5

    sys.setrecursionlimit(40000)

    num_roi = 128
    num_epoch = num_ep
    model_save = "./model_frcnn.hdf5"

    parser = OptionParser()
    parser.add_option("-p",
                      "--path",
                      dest="train_path",
                      help="Path to training data.",
                      default=traval_path)  ###
    parser.add_option("-o",
                      "--parser",
                      dest="parser",
                      help="Parser to use. One of simple or pascal_voc",
                      default="pascal_voc")
    parser.add_option("-n",
                      "--num_rois",
                      type="int",
                      dest="num_rois",
                      help="Number of RoIs to process at once.",
                      default=num_roi)  ###
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default=base_network)  ###resnet50

    parser.add_option(
        "--hf",
        dest="horizontal_flips",
        help="Augment with horizontal flips in training. (Default=false).",
        action="store_true",
        default=dataAug_hf)  ##
    parser.add_option(
        "--vf",
        dest="vertical_flips",
        help="Augment with vertical flips in training. (Default=false).",
        action="store_true",
        default=dataAug_vf)  ##
    parser.add_option(
        "--rot",
        "--rot_90",
        dest="rot_90",
        help="Augment with 90 degree rotations in training. (Default=false).",
        action="store_true",
        default=dataAug_rot)  ##

    parser.add_option("--num_epochs",
                      type="int",
                      dest="num_epochs",
                      help="Number of epochs.",
                      default=num_epoch)  ###2000
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to store all the metadata related to the training (to be used when testing).",
        default="config.pickle")

    parser.add_option("--output_weight_path",
                      dest="output_weight_path",
                      help="Output path for weights.",
                      default=model_save)  ###

    parser.add_option(
        "--input_weight_path",
        dest="input_weight_path",
        help=
        "Input path for weights. If not specified, will try to load default weights provided by keras.",
        default=traval_path + weights)

    (options, args) = parser.parse_args()

    if not options.train_path:  # if filename is not given
        parser.error(
            'Error: path to training data must be specified. Pass --path to command line'
        )

    if options.parser == 'pascal_voc':
        from keras_frcnn.pascal_voc_parser import get_data
    elif options.parser == 'simple':
        from keras_frcnn.simple_parser import get_data
    else:
        raise ValueError(
            "Command line option parser must be one of 'pascal_voc' or 'simple'"
        )

    # pass the settings from the command line, and persist them in the config object
    C = config.Config()

    C.use_horizontal_flips = bool(options.horizontal_flips)
    C.use_vertical_flips = bool(options.vertical_flips)
    C.rot_90 = bool(options.rot_90)

    C.model_path = options.output_weight_path
    C.num_rois = int(options.num_rois)

    if options.network == 'vgg':
        C.network = 'vgg'
        from keras_frcnn import vgg as nn
    elif options.network == 'resnet50':
        from keras_frcnn import resnet as nn
        C.network = 'resnet50'
    else:
        print('Not a valid model')
        raise ValueError

    # check if weight path was passed via command line
    if options.input_weight_path:
        C.base_net_weights = options.input_weight_path
    else:
        # set the path to weights based on backend and model
        C.base_net_weights = nn.get_weight_path()

    all_imgs, classes_count, class_mapping = get_data(
        options.train_path)  #######
    #all_imgs, classes_count, class_mapping = get_data(train_path)#######

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    textEdit.append('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    textEdit.append('Num classes (including bg) = {}'.format(
        len(classes_count)))
    config_output_filename = options.config_filename

    with open(config_output_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))
        textEdit.append(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))
    random.shuffle(all_imgs)  ##随机打乱

    num_imgs = len(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'val']

    print('Num train samples {}'.format(len(train_imgs)))
    textEdit.append('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))
    textEdit.append('Num val samples {}'.format(len(val_imgs)))

    #####RPN计算
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 C,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(C.base_net_weights))
        textEdit.append('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
    except:
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder \
			https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses.rpn_loss_cls(num_anchors),
                          losses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses.class_loss_cls,
            losses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')
    print(' training')
    epoch_length = 1
    num_epochs = int(options.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')
    textEdit.append('Starting training')

    vis = True
    save_num = 1

    loss_rpn_cls_list = []
    loss_rpn_regr_list = []
    loss_class_cls_list = []
    loss_class_regr_list = []
    curr_loss_list = []

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
        textEdit.append('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=0.7,
                                           max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                ####数据更新在这里
                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_cls_list.append(loss_rpn_cls)
                    curvelist[1].setData(loss_rpn_cls_list)
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_rpn_regr_list.append(loss_rpn_regr)
                    curvelist[2].setData(loss_rpn_regr_list)
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_cls_list.append(loss_class_cls)
                    curvelist[3].setData(loss_class_cls_list)
                    loss_class_regr = np.mean(losses[:, 3])
                    loss_class_regr_list.append(loss_class_regr)
                    curvelist[4].setData(loss_class_regr_list)
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        textEdit.append(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        textEdit.append(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        textEdit.append(
                            'Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        textEdit.append(
                            'Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        textEdit.append('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        textEdit.append('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))
                        textEdit.append('Elapsed time: {}'.format(time.time() -
                                                                  start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    curr_loss_list.append(curr_loss)
                    curvelist[0].setData(curr_loss_list)
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                            textEdit.append(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(C.model_path)
                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                textEdit.append('Exception: {}'.format(e))
                continue
        if (epoch_num == (save_num * 30)):
            model_all.save_weights(C.model_path + str(save_num * 30) + ".hdf5")
            save_num = save_num + 1
    print('Training complete, exiting.')
    textEdit.append('Training complete, exiting.')
示例#17
0
data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length,
											   K.image_dim_ordering(), mode='train')
data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')

if K.image_dim_ordering() == 'th':
	input_shape_img = (3, None, None)
else:
	input_shape_img = (None, None, 3)  # 指明是3通道,其余W*H前两维不管,是正数就行

img_input = Input(shape=input_shape_img)  # 将原本后端的tensor增加一些内容 改变为Keras的tensor
# img_input = Input(shape=input_shape_img, batch_shape=2)
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)  # 里面就是具体的网络模型结构,是初步用于提取图片特征的网络,比如VGG或者ResNet

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)  # 用于计算一共有多少anchor

rpn = nn.rpn(shared_layers,
			 num_anchors)  # 这里就是PRN的网络结构,shared_layer保存了VGG/ResNet提取出来的特征(tensor) 传递给接下来的RPN网络;这里rpn的值x_class(分类:是否为物体), x_regr(4*k个回归), base_layers(vgg/resnet最后一个卷积层提取出来的特征值)

classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True)

# 以下的模型全不都是根据输入输出来构造的!
model_rpn = Model(img_input, rpn[:2])  # 构造模型,参数分别是PRN的两个全链接层的输出。

model_classifier = Model([img_input, roi_input], classifier)  # 构造整体的网络,最后输出用于最终的分类

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
示例#18
0
                                             classes_count,
                                             C,
                                             nn.get_img_output_length,
                                             K.image_dim_ordering(),
                                             mode='val')

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
else:
    input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=False)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers,
                           roi_input,
                           C.num_rois,
                           nb_classes=len(classes_count),
                           trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
示例#19
0
def run_train(train_path,
              output_weight_path,
              config_filename,
              parser='simple',
              input_weight_path=None,
              network='resnet50',
              num_rois=32,
              lr=1e-5,
              iters=100,
              num_epochs=100,
              overlap_th=0.7):

    C = config.Config()

    C.model_path = output_weight_path
    C.num_rois = int(num_rois)

    if network == 'vgg':
        C.network = 'vgg'
        from keras_frcnn import vgg as nn
    elif network == 'resnet50':
        from keras_frcnn import resnet as nn
        C.network = 'resnet50'
    else:
        print('Not a valid model')
        raise ValueError

    if parser == 'pascal_voc':
        from keras_frcnn.pascal_voc_parser import get_data
    elif parser == 'simple':
        from keras_frcnn.simple_parser import get_data
    else:
        print('Wrong parser method')
        raise ValueError

    if input_weight_path is not None:
        C.base_net_weights = input_weight_path
    else:
        C.base_net_weights = nn.get_weight_path()

    all_imgs, classes_count, class_mapping = get_data(train_path)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))

    config_output_filename = config_filename

    with open(config_output_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))

    random.shuffle(all_imgs)

    num_imgs = len(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 C,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
    except:
        print('Could not load pretrained model weights...')

    optimizer = Adam(lr=lr)
    optimizer_classifier = Adam(lr=lr)
    from keras_frcnn import losses as losses
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses.rpn_loss_cls(num_anchors),
                          losses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses.class_loss_cls,
            losses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = int(iters)
    num_epochs = int(num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=overlap_th,
                                           max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(C.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue

    print('Training complete, exiting.')
def train_model(seed_data, classes_count, class_mapping,con,Earlystopping_patience,config_output_filename):
    sys.setrecursionlimit(40000)
    #utils.reset_keras()
    from keras_frcnn import losses as losses    
    
    if con.network == 'vgg':
        from keras_frcnn import vgg as nn 
    elif con.network == 'resnet50': 
        from keras_frcnn import resnet as nn
    elif con.network == 'xception':
        from keras_frcnn import xception as nn
    elif con.network == 'inception_resnet_v2':
        from keras_frcnn import inception_resnet_v2 as nn
    else:
        print('Not a valid model')
        raise ValueError

    # bg 
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    con.class_mapping = class_mapping

    inv_map = {v: k for k, v in class_mapping.items()}


    random.shuffle(seed_data)

    num_imgs = len(seed_data)
    #train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
    train_imgs = [s for s in seed_data if s['imageset'] == 'trainval']
    val_imgs = [s for s in seed_data if s['imageset'] == 'val']
    test_imgs = [s for s in seed_data if s['imageset'] == 'test']
    
    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))
    print('Num test samples {}'.format(len(test_imgs)))
   
    # groundtruth
    data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, con, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, con, nn.get_img_output_length, K.image_dim_ordering(), mode='val')
    data_gen_test = data_generators.get_anchor_gt(test_imgs, classes_count, con, nn.get_img_output_length, K.image_dim_ordering(), mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    # input placeholder 
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # base network(feature extractor)(resnet, VGG, Inception, Inception Resnet V2, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    # RPN 
    num_anchors = len(con.anchor_box_scales) * len(con.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    # detection network
    classifier = nn.classifier(shared_layers, roi_input, con.num_rois, nb_classes=len(classes_count), trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    try:
        # load_weights by name
        # some keras application model does not containing name
        # for this kinds of model, we need to re-construct model with naming
        print('loading weights from {}'.format(con.base_net_weights))
        model_rpn.load_weights(con.base_net_weights, by_name=True)
        model_classifier.load_weights(con.base_net_weights, by_name=True)
    except:
        print('Could not load pretrained model weights. Weights can be found in the keras application folder \
            https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
    model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    # Tensorboard log
    log_path = './logs'
    if not os.path.isdir(log_path):
        os.mkdir(log_path)

    # Tensorboard log
    callback = TensorBoard(log_path)
    callback.set_model(model_all)

    epoch_length = con.num_epochs
    num_epochs = int(con.num_epochs)
    iter_num = 0
    train_step = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    
    # early stopping 
    #keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', baseline=None, restore_best_weights=False)
    change = 0
    
    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    # vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)   # keras progress bar 
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            # try:
            # mean overlapping bboxes
            if len(rpn_accuracy_rpn_monitor) == epoch_length and con.verbose:
                mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor)
                rpn_accuracy_rpn_monitor = []
                print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length))
                if mean_overlapping_bboxes == 0:
                    print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')

            # data generator에서 X, Y, image 
            X, Y, img_data = next(data_gen_train)

            loss_rpn = model_rpn.train_on_batch(X, Y)
            write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn, train_step)

            P_rpn = model_rpn.predict_on_batch(X)

            R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], con, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300)
            # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, con, class_mapping)

            if X2 is None:
                rpn_accuracy_rpn_monitor.append(0)
                rpn_accuracy_for_epoch.append(0)
                continue

            # sampling positive/negative samples
            neg_samples = np.where(Y1[0, :, -1] == 1)
            pos_samples = np.where(Y1[0, :, -1] == 0)

            if len(neg_samples) > 0:
                neg_samples = neg_samples[0]
            else:
                neg_samples = []

            if len(pos_samples) > 0:
                pos_samples = pos_samples[0]
            else:
                pos_samples = []

            rpn_accuracy_rpn_monitor.append(len(pos_samples))
            rpn_accuracy_for_epoch.append((len(pos_samples)))

            if con.num_rois > 1:
                if len(pos_samples) < con.num_rois//2:
                    selected_pos_samples = pos_samples.tolist()
                else:
                    selected_pos_samples = np.random.choice(pos_samples, con.num_rois//2, replace=False).tolist()
                try:
                    selected_neg_samples = np.random.choice(neg_samples, con.num_rois - len(selected_pos_samples), replace=False).tolist()
                except:
                    try:
                        selected_neg_samples = np.random.choice(neg_samples, con.num_rois - len(selected_pos_samples), replace=True).tolist()
                    except:
                        # The neg_samples is [[1 0 ]] only, therefore there's no negative sample
                        continue
                sel_samples = selected_pos_samples + selected_neg_samples
            else:
                # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                selected_pos_samples = pos_samples.tolist()
                selected_neg_samples = neg_samples.tolist()
                if np.random.randint(0, 2):
                    sel_samples = random.choice(neg_samples)
                else:
                    sel_samples = random.choice(pos_samples)

            loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
            write_log(callback, ['detection_cls_loss', 'detection_reg_loss', 'detection_acc'], loss_class, train_step)
            train_step += 1

            losses[iter_num, 0] = loss_rpn[1]
            losses[iter_num, 1] = loss_rpn[2]

            losses[iter_num, 2] = loss_class[1]
            losses[iter_num, 3] = loss_class[2]
            losses[iter_num, 4] = loss_class[3]

            iter_num += 1

            progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                    ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))])

            if iter_num == epoch_length:
                loss_rpn_cls = np.mean(losses[:, 0])
                loss_rpn_regr = np.mean(losses[:, 1])
                loss_class_cls = np.mean(losses[:, 2])
                loss_class_regr = np.mean(losses[:, 3])
                class_acc = np.mean(losses[:, 4])

                mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                rpn_accuracy_for_epoch = []

                if con.verbose:
                    print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
                    print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                    print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                    print('Loss RPN regression: {}'.format(loss_rpn_regr))
                    print('Loss Detector classifier: {}'.format(loss_class_cls))
                    print('Loss Detector regression: {}'.format(loss_class_regr))
                    print('Elapsed time: {}'.format(time.time() - start_time))

                curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                iter_num = 0
                start_time = time.time()

                write_log(callback,
                        ['Elapsed_time', 'mean_overlapping_bboxes', 'mean_rpn_cls_loss', 'mean_rpn_reg_loss',
                        'mean_detection_cls_loss', 'mean_detection_reg_loss', 'mean_detection_acc', 'total_loss'],
                        [time.time() - start_time, mean_overlapping_bboxes, loss_rpn_cls, loss_rpn_regr,
                        loss_class_cls, loss_class_regr, class_acc, curr_loss],
                        epoch_num)

                if curr_loss < con.best_loss:
                    if con.verbose:
                        print('Total loss decreased from {} to {}, saving weights'.format(con.best_loss,curr_loss))
                    con.best_loss = curr_loss
                    con = utils.update_config_file(config_output_filename,con)
                    print("saving weight")
                    model_all.save_weights(con.model_path)
                    print("weight saved")
                    change = 0
                else:
                    change += 1
                break
        if Earlystopping_patience != None:
            if Earlystopping_patience == change:
                print("training stopped by early stopping")
                break           
                

    print('Training complete, exiting.')
    return con
示例#21
0
def train_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())
    # cfg.base_net_weights=r''

    # TODO: the only file should to be change for other data to train
    cfg.model_path = '/media/private/Ci/log/plane/frcnn/vgg-adam'

    now = datetime.datetime.now()
    day = now.strftime('%y-%m-%d')
    for i in range(10000):
        if not os.path.exists('%s-%s-%d' % (cfg.model_path, day, i)):
            cfg.model_path = '%s-%s-%d' % (cfg.model_path, day, i)
            break

    make_dir(cfg.model_path)
    make_dir(cfg.model_path + '/loss')
    make_dir(cfg.model_path + '/loss_rpn_cls')
    make_dir(cfg.model_path + '/loss_rpn_regr')
    make_dir(cfg.model_path + '/loss_class_cls')
    make_dir(cfg.model_path + '/loss_class_regr')

    cfg.simple_label_file = '/media/public/GEOWAY/plane/plane0817.csv'

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    cfg.config_save_file = os.path.join(cfg.model_path, 'config.pickle')
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')
    Q = multiprocessing.Manager().Queue(maxsize=30)

    def fill_Q(n):
        while True:

            if not Q.full():
                Q.put(next(data_gen_train))
                #print(Q.qsize(),'put',n)
            else:
                time.sleep(0.00001)

    threads = []
    for i in range(4):
        thread = multiprocessing.Process(target=fill_Q, args=(i, ))
        threads.append(thread)
        thread.start()

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    # model_all.summary()
    from keras.utils import plot_model
    # os.environ['PATH'] = os.environ['PATH'] + r';C:\Program Files (x86)\Graphviz2.38\bin;'

    plot_model(model_all,
               'model_all.png',
               show_layer_names=True,
               show_shapes=True)
    plot_model(model_classifier,
               'model_classifier.png',
               show_layer_names=True,
               show_shapes=True)
    plot_model(model_rpn,
               'model_rpn.png',
               show_layer_names=True,
               show_shapes=True)
    '''
    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')
    '''

    optimizer = adadelta()
    optimizer_classifier = adadelta()
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 10
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf
    best_rpn_cls = np.Inf
    best_rpn_regr = np.Inf
    best_class_cls = np.Inf
    best_class_regr = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

            #    X, Y, img_data = next(data_gen_train)
                while True:

                    if Q.empty():
                        time.sleep(0.00001)
                        continue

                    X, Y, img_data = Q.get()
                    #    print(Q.qsize(),'get')
                    break
            #  print(X.shape,Y.shape)
                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss', epoch_num, curr_loss,
                               loss_rpn_cls, loss_rpn_regr, loss_class_cls,
                               loss_class_regr))
                    if loss_rpn_cls < best_rpn_cls:
                        if cfg.verbose:
                            print(
                                'loss_rpn_cls decreased from {} to {}, saving weights'
                                .format(best_rpn_cls, loss_rpn_cls))
                            best_rpn_cls = loss_rpn_cls
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_rpn_cls', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_rpn_regr < best_rpn_regr:
                        if cfg.verbose:
                            print(
                                'loss_rpn_regr decreased from {} to {}, saving weights'
                                .format(best_rpn_regr, loss_rpn_regr))
                            best_rpn_regr = loss_rpn_regr
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_rpn_regr', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_class_cls < best_class_cls:
                        if cfg.verbose:
                            print(
                                'loss_class_cls decreased from {} to {}, saving weights'
                                .format(best_loss, loss_class_cls))
                            best_class_cls = loss_class_cls
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_class_cls', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_class_regr < best_class_regr:
                        if cfg.verbose:
                            print(
                                'loss_class_regr decreased from {} to {}, saving weights'
                                .format(best_loss, loss_class_regr))
                            best_class_regr = loss_class_regr
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_class_regr', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))

                    break

            except Exception as e:
                #   print('Exception: {}'.format(e))
                # save model
                #    model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
示例#22
0
else:
    input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))  # roiinput是什么,要去看看清楚
part_roi_input = Input(shape=[None, 4])
bird_rois_input0 = Input(shape=(None, 4))
bird_rois_input1 = Input(shape=(None, 4))
bird_rois_input2 = Input(shape=(None, 4))
bird_rois_input3 = Input(shape=(None, 4))
bird_rois_input4 = Input(shape=(None, 4))
bird_rois_input5 = Input(shape=(None, 4))
bird_rois_input6 = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)  # 共享网络层的输出.要明确输出的size

# define the RPN, built on the base layers
num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
rpn = nn.rpn(shared_layers,
             num_anchors)  ##这里应该是只是做了两层简单的卷积,没有anchor的引入,anchor的体现应在损失函数中.
# 返回是一个list,包括了rpn的分类和回国的只.

classifier = nn.classifier(shared_layers,
                           roi_input,
                           cfg.num_rois,
                           nb_classes=len(classes_count),
                           trainable=True)  # 主要这里的nb_classes改程序的时候要主要
# 这里roiinput 似乎是作为一个输入,看下面怎么弄的e
#bird_classifier_output = nn.fg_classifier(shared_layers,bird_rois_input0,bird_rois_input1,bird_rois_input2,bird_rois_input3,bird_rois_input4,bird_rois_input5,bird_rois_input6,nb_classes=200, trainable=True)
holyclass_out = nn.fine_layer(shared_layers,
示例#23
0
print('Num val samples {}'.format(len(val_)))


data_gen_train = data_generators.get_anchor_gt(train_, count_c, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
data_gen_val = data_generators.get_anchor_gt(val_, count_c, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
else:
    input_shape_img = (None, None, 3)

imgi = Input(shape=input_shape_img)
roii = Input(shape=(None, 4))

# define the base network (resnet,VGG, Inception, etc)
shared_layers = nn.nn_base(imgi, trainable=True)

# define the RPN : 
nbr_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, nbr_anchors)

classifier = nn.classifier(shared_layers, roii, C.num_rois, nb_classes=len(count_c), trainable=True)

model_rpn = Model(imgi, rpn[:2])
model_classifier = Model([imgi, roii], classifier)

model_all = Model([imgi, roii], rpn[:2] + classifier)

optimizer = Adam(lr=1e-5)
optimizer_clf = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(nbr_anchors), losses.rpn_loss_regr(nbr_anchors)])
示例#24
0
                                             classes_count,
                                             C,
                                             nn.get_img_output_length,
                                             K.image_dim_ordering(),
                                             mode='val')

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
else:
    input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers,
                           roi_input,
                           C.num_rois,
                           nb_classes=len(classes_count),
                           trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
def main_function():
    import os
    parser = OptionParser()

    parser.add_option("-p",
                      "--path",
                      dest="test_path",
                      help="Path to test data.",
                      default='test/')
    parser.add_option(
        "-n",
        "--num_rois",
        type="int",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=32)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='resnet50')
    parser.add_option("--write",
                      dest="write",
                      help="to write out the image with detections or not.",
                      action='store_true')
    parser.add_option("--load",
                      dest="load",
                      help="specify model path.",
                      default=None)
    (options, args) = parser.parse_args()

    #if not options.test_path:   # if filename is not given
    #	parser.error('Error: path to test data must be specified. Pass --path to command line')

    options.network = 'mobilenetv2'
    #options.path='test'
    options.write = 'yes'

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    # we will use resnet. may change to vgg
    if options.network == 'vgg':
        C.network = 'vgg16'
        from keras_frcnn import vgg as nn
    elif options.network == 'resnet50':
        from keras_frcnn import resnet as nn
        C.network = 'resnet50'
    elif options.network == 'vgg19':
        from keras_frcnn import vgg19 as nn
        C.network = 'vgg19'
    elif options.network == 'mobilenetv1':
        from keras_frcnn import mobilenetv1 as nn
        C.network = 'mobilenetv1'
    #	from keras.applications.mobilenet import preprocess_input
    elif options.network == 'mobilenetv1_05':
        from keras_frcnn import mobilenetv1_05 as nn
        C.network = 'mobilenetv1_05'
    #	from keras.applications.mobilenet import preprocess_input
    elif options.network == 'mobilenetv1_25':
        from keras_frcnn import mobilenetv1_25 as nn
        C.network = 'mobilenetv1_25'
    #	from keras.applications.mobilenet import preprocess_input
    elif options.network == 'mobilenetv2':
        from keras_frcnn import mobilenetv2 as nn
        C.network = 'mobilenetv2'
    else:
        print('Not a valid model')
        raise ValueError

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == "mobilenetv2":
        num_features = 320
    else:
        # may need to fix this up with your backbone..!
        print("backbone is not resnet50. number of features chosen is 512")
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping))

    model_rpn = Model(img_input, rpn_layers)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    # model loading
    if options.load == None:
        print('Loading weights from {}'.format(C.model_path))
        model_rpn.load_weights(C.model_path, by_name=True)
        model_classifier.load_weights(C.model_path, by_name=True)
    else:
        print('Loading weights from {}'.format(options.load))
        model_rpn.load_weights(options.load, by_name=True)
        model_classifier.load_weights(options.load, by_name=True)

    #model_rpn.compile(optimizer='adam', loss='mse')
    #model_classifier.compile(optimizer='adam', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.5

    visualise = True

    num_rois = C.num_rois

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        # preprocess image
        X, ratio = format_img(img, C)
        img_scaled = (np.transpose(X[0, :, :, :],
                                   (1, 2, 0)) + 127.5).astype('uint8')
        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))
        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.3)
        print(R.shape)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}
        for jk in range(R.shape[0] // num_rois + 1):
            ROIs = np.expand_dims(R[num_rois * jk:num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier.predict([F, ROIs])
            print(P_cls)

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < 0.8 or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []
                (x, y, w, h) = ROIs[0, ii, :]

                bboxes[cls_name].append(
                    [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            print(key)
            print(len(bboxes[key]))
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.3)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]
                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(
                    img, (real_x1, real_y1), (real_x2, real_y2),
                    (int(class_to_color[key][0]), int(class_to_color[key][1]),
                     int(class_to_color[key][2])), 2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 2)
                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX,
                            1, (0, 0, 0), 1)

        print('Elapsed time = {}'.format(time.time() - st))
        print(all_dets)
        print(bboxes)
        # enable if you want to show pics
        if options.write:
            import os
            if not os.path.isdir("results"):
                os.mkdir("results")
            cv2.imwrite('./results/{}.png'.format(idx), img)