Exemplo n.º 1
0
def train(model_name, epochs=60, batch_size=1, lr=0.0001, decay=0.001):
    t = time.time()
    all_imgs, classes_count, class_mapping = get_data(ROI_BBOX_FILE)
    print("Parsing annotation files took " + str((time.time() - t) / 1000) + "s")

    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)

    C.model_name = model_name

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    if not os.path.isfile(ROI_CLASSES_FILE):
        with open(ROI_CLASSES_FILE, 'w') as class_data_json:
            json.dump(class_mapping, class_data_json)

    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    model = build_model(classes_count, num_anchors)

    optimizer = Adam(lr=lr, decay=decay)
    model.compile(optimizer=optimizer,
                  loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors),
                        losses.class_loss_cls,
                        losses.class_loss_regr(C.num_rois, len(classes_count) - 1)],
                  metrics={'dense_class_{}_loss'.format(len(classes_count)): 'accuracy'})

    data_gen_train = data_generators.get_anchor_gt(train_imgs, class_mapping, classes_count,
                                                   C, K.image_dim_ordering(), mode='train')

    data_gen_val = data_generators.get_anchor_gt(val_imgs, class_mapping, classes_count,
                                                 C, K.image_dim_ordering(), mode='val')

    callbacks = [EarlyStopping(monitor='val_loss', patience=20, verbose=0),
                 ModelCheckpoint(C.get_model_path(), monitor='val_loss', save_best_only=True, verbose=0),
                 ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
                 LoggingCallback(C)]

    print('Starting training')
    model.fit_generator(data_gen_train, steps_per_epoch=ceil(len(train_imgs) / batch_size),
                        epochs=epochs, validation_data=data_gen_val,
                        validation_steps=ceil(len(train_imgs) / batch_size),
                        callbacks=callbacks, max_q_size=1, workers=1)
Exemplo n.º 2
0
    model_classifier_val.load_weights(C.base_net_weights, by_name=True)
except:
    print(
        'Could not load pretrained model weights. Weights can be found in the keras application folder \
		https://github.com/fchollet/keras/tree/master/keras/applications')

rpn_optimizer = Adam(learning_rate=1e-5)
class_optimizer = Adam(learning_rate=1e-5)

# RPN losses
rpn_cls_loss = loss_funcs.rpn_loss_cls(num_anchors)
rpn_regr_loss = loss_funcs.rpn_loss_regr(num_anchors)

# Class losses
class_cls_loss = loss_funcs.class_loss_cls
class_regr_loss = loss_funcs.class_loss_regr(len(classes_count) - 1)

model_all.compile(optimizer='sgd', loss='mae')

epoch_length = C.epoch_length
num_epochs = C.epochs
iter_num = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_for_epoch = []
start_time = time.time()

best_f1 = np.Inf

class_mapping_inv = {v: k for k, v in class_mapping.items()}
print('Starting training')
Exemplo n.º 3
0
model_all = Model([img_input, roi_input], rpn[:2] + classifier)


try:
    print('loading weights from {}'.format(C.base_net_weights))
    model_rpn.load_weights(C.base_net_weights, by_name=True)
    model_classifier.load_weights(C.base_net_weights, by_name=True) 
    print('load from your model')
except:
    print('Could not load pretrained model weights. Weights can be found in the keras application folder \https://github.com/fchollet/keras/tree/master/keras/applications')

    
optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)], metrics={'classifier': 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

epoch_length = 1986
num_epochs = int(num_epochs)
iter_num = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = np.Inf

class_mapping_inv = {v: k for k, v in class_mapping.items()}
print('Starting training')
Exemplo n.º 4
0
except:
    print(
        'Could not load pretrained model weights. Weights can be found in the keras application folder \
		https://github.com/fchollet/keras/tree/master/keras/applications')

optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(
    optimizer=optimizer,
    loss=[losses.rpn_loss_cls(num_anchors),
          losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(
    optimizer=optimizer_classifier,
    loss=[
        losses.class_loss_cls,
        losses.class_loss_regr(len(classes_count) - 1)
    ],
    metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

epoch_length = 100
num_epochs = int(options.num_epochs)
iter_num = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = np.Inf
def test_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'

    cfg.simple_label_file = 'kitti_simple_label.txt'
    #查看绝对路径
    #t = os.path.abspath('kitti_simple_label.txt')

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)
    pedestrain_num = classes_count['Pedestrian']
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)
    #img_input:  三通道,为输入图片
    img_input = Input(shape=input_shape_img)
    #roi_input:为输入图片boudingbox的四维值
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    #shared_layers : 基础的网络结构(例如: resnet,vgg)通过该网络来提取原始图片的featuremap特征,最后将这些特征送入RPN网络和RCNN网络
    # 1.定义nn的输入层,faster-rcnn共享卷积层,
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers 2.定义RPN层
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    #RPN网络用于生成region proposals,该层通过sigmoid函数判断anchors属于foreground或者background, 再利用bounding box regression修正anchors获得修正后的RoI。
    # rpn: 在基础的网络结构使用9个bounding box产生了分类和回归的rpn网络。定义rpn层,return [x_class, x_regr, base_layers]
    rpn = nn.rpn(shared_layers, num_anchors)
    #定义分类器层,定义classifier的输入和输出
    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)
    #定义rpn模型的输入和输出一个框2分类(最后使用的sigmod而不是softmax)和框的回归
    model_rpn = Model(img_input, rpn[:2])
    #定义classifier的输入和输出
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        #TODO 第一次运行因为model_path没有hdf5文件,因此修改为cfg.base_net_weights,现在可以修改回来
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
        # model_rpn.load_weights(cfg.base_net_weights, by_name=True)
        # model_classifier.load_weights(cfg.base_net_weights, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    #todo 增加tensorboard日志文件
    log_path = './graph'
    callback = TensorBoard(log_path,
                           histogram_freq=0,
                           write_graph=True,
                           write_images=True)
    callback.set_model(model_all)

    #todo epoch的大小为训练图片的个数
    # epoch_length = len(train_imgs)
    epoch_length = len(val_imgs)
    #epoch_length = 47182
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    #todo
    losses_val = np.zeros((epoch_length, 5))

    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    rpn_accuracy_rpn_monitor_val = []
    rpn_accuracy_for_epoch_val = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting testing')

    vis = True

    allbbox = 0

    #只有训练的,改成只有测试的
    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )
                #todo train修改为val
                # X, Y, img_data = next(data_gen_train)
                X, Y, img_data = next(data_gen_val)

                # loss_rpn = model_rpn.train_on_batch(X, Y)
                loss_rpn = model_rpn.test_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format

                #todo 增加 count
                # X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, class_mapping)
                X2, Y1, Y2, IouS, count = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)
                allbbox = allbbox + count
                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print("检测准确率:")
    print(float(allbbox / pedestrain_num))
    print('testing complete, exiting.')
except:
    print('Could not load pretrained model weights. Weights can be found in the keras application folder \
        https://github.com/fchollet/keras/tree/master/keras/applications')

# ------------------------------------------------------ User multi GPU support
gpus = get_available_gpus()
ngpus = len(gpus)
print_mgpu_modelsummary(model_rpn)
model_rpn = make_parallel(model_rpn, gpus)
print_mgpu_modelsummary(model_rpn)

optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(
    num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(
    len(classes_count) - 1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

epoch_length = 1000
num_epochs = int(options.num_epochs)
iter_num = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = np.Inf

class_mapping_inv = {v: k for k, v in class_mapping.items()}
print('Starting training')
def train_model(seed_data, classes_count, class_mapping,con,Earlystopping_patience,config_output_filename):
    sys.setrecursionlimit(40000)
    #utils.reset_keras()
    from keras_frcnn import losses as losses    
    
    if con.network == 'vgg':
        from keras_frcnn import vgg as nn 
    elif con.network == 'resnet50': 
        from keras_frcnn import resnet as nn
    elif con.network == 'xception':
        from keras_frcnn import xception as nn
    elif con.network == 'inception_resnet_v2':
        from keras_frcnn import inception_resnet_v2 as nn
    else:
        print('Not a valid model')
        raise ValueError

    # bg 
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    con.class_mapping = class_mapping

    inv_map = {v: k for k, v in class_mapping.items()}


    random.shuffle(seed_data)

    num_imgs = len(seed_data)
    #train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
    train_imgs = [s for s in seed_data if s['imageset'] == 'trainval']
    val_imgs = [s for s in seed_data if s['imageset'] == 'val']
    test_imgs = [s for s in seed_data if s['imageset'] == 'test']
    
    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))
    print('Num test samples {}'.format(len(test_imgs)))
   
    # groundtruth
    data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, con, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, con, nn.get_img_output_length, K.image_dim_ordering(), mode='val')
    data_gen_test = data_generators.get_anchor_gt(test_imgs, classes_count, con, nn.get_img_output_length, K.image_dim_ordering(), mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    # input placeholder 
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # base network(feature extractor)(resnet, VGG, Inception, Inception Resnet V2, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    # RPN 
    num_anchors = len(con.anchor_box_scales) * len(con.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    # detection network
    classifier = nn.classifier(shared_layers, roi_input, con.num_rois, nb_classes=len(classes_count), trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    try:
        # load_weights by name
        # some keras application model does not containing name
        # for this kinds of model, we need to re-construct model with naming
        print('loading weights from {}'.format(con.base_net_weights))
        model_rpn.load_weights(con.base_net_weights, by_name=True)
        model_classifier.load_weights(con.base_net_weights, by_name=True)
    except:
        print('Could not load pretrained model weights. Weights can be found in the keras application folder \
            https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
    model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    # Tensorboard log
    log_path = './logs'
    if not os.path.isdir(log_path):
        os.mkdir(log_path)

    # Tensorboard log
    callback = TensorBoard(log_path)
    callback.set_model(model_all)

    epoch_length = con.num_epochs
    num_epochs = int(con.num_epochs)
    iter_num = 0
    train_step = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    
    # early stopping 
    #keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', baseline=None, restore_best_weights=False)
    change = 0
    
    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    # vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)   # keras progress bar 
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            # try:
            # mean overlapping bboxes
            if len(rpn_accuracy_rpn_monitor) == epoch_length and con.verbose:
                mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor)
                rpn_accuracy_rpn_monitor = []
                print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length))
                if mean_overlapping_bboxes == 0:
                    print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')

            # data generator에서 X, Y, image 
            X, Y, img_data = next(data_gen_train)

            loss_rpn = model_rpn.train_on_batch(X, Y)
            write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn, train_step)

            P_rpn = model_rpn.predict_on_batch(X)

            R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], con, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300)
            # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, con, class_mapping)

            if X2 is None:
                rpn_accuracy_rpn_monitor.append(0)
                rpn_accuracy_for_epoch.append(0)
                continue

            # sampling positive/negative samples
            neg_samples = np.where(Y1[0, :, -1] == 1)
            pos_samples = np.where(Y1[0, :, -1] == 0)

            if len(neg_samples) > 0:
                neg_samples = neg_samples[0]
            else:
                neg_samples = []

            if len(pos_samples) > 0:
                pos_samples = pos_samples[0]
            else:
                pos_samples = []

            rpn_accuracy_rpn_monitor.append(len(pos_samples))
            rpn_accuracy_for_epoch.append((len(pos_samples)))

            if con.num_rois > 1:
                if len(pos_samples) < con.num_rois//2:
                    selected_pos_samples = pos_samples.tolist()
                else:
                    selected_pos_samples = np.random.choice(pos_samples, con.num_rois//2, replace=False).tolist()
                try:
                    selected_neg_samples = np.random.choice(neg_samples, con.num_rois - len(selected_pos_samples), replace=False).tolist()
                except:
                    try:
                        selected_neg_samples = np.random.choice(neg_samples, con.num_rois - len(selected_pos_samples), replace=True).tolist()
                    except:
                        # The neg_samples is [[1 0 ]] only, therefore there's no negative sample
                        continue
                sel_samples = selected_pos_samples + selected_neg_samples
            else:
                # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                selected_pos_samples = pos_samples.tolist()
                selected_neg_samples = neg_samples.tolist()
                if np.random.randint(0, 2):
                    sel_samples = random.choice(neg_samples)
                else:
                    sel_samples = random.choice(pos_samples)

            loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
            write_log(callback, ['detection_cls_loss', 'detection_reg_loss', 'detection_acc'], loss_class, train_step)
            train_step += 1

            losses[iter_num, 0] = loss_rpn[1]
            losses[iter_num, 1] = loss_rpn[2]

            losses[iter_num, 2] = loss_class[1]
            losses[iter_num, 3] = loss_class[2]
            losses[iter_num, 4] = loss_class[3]

            iter_num += 1

            progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                    ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))])

            if iter_num == epoch_length:
                loss_rpn_cls = np.mean(losses[:, 0])
                loss_rpn_regr = np.mean(losses[:, 1])
                loss_class_cls = np.mean(losses[:, 2])
                loss_class_regr = np.mean(losses[:, 3])
                class_acc = np.mean(losses[:, 4])

                mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                rpn_accuracy_for_epoch = []

                if con.verbose:
                    print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
                    print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                    print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                    print('Loss RPN regression: {}'.format(loss_rpn_regr))
                    print('Loss Detector classifier: {}'.format(loss_class_cls))
                    print('Loss Detector regression: {}'.format(loss_class_regr))
                    print('Elapsed time: {}'.format(time.time() - start_time))

                curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                iter_num = 0
                start_time = time.time()

                write_log(callback,
                        ['Elapsed_time', 'mean_overlapping_bboxes', 'mean_rpn_cls_loss', 'mean_rpn_reg_loss',
                        'mean_detection_cls_loss', 'mean_detection_reg_loss', 'mean_detection_acc', 'total_loss'],
                        [time.time() - start_time, mean_overlapping_bboxes, loss_rpn_cls, loss_rpn_regr,
                        loss_class_cls, loss_class_regr, class_acc, curr_loss],
                        epoch_num)

                if curr_loss < con.best_loss:
                    if con.verbose:
                        print('Total loss decreased from {} to {}, saving weights'.format(con.best_loss,curr_loss))
                    con.best_loss = curr_loss
                    con = utils.update_config_file(config_output_filename,con)
                    print("saving weight")
                    model_all.save_weights(con.model_path)
                    print("weight saved")
                    change = 0
                else:
                    change += 1
                break
        if Earlystopping_patience != None:
            if Earlystopping_patience == change:
                print("training stopped by early stopping")
                break           
                

    print('Training complete, exiting.')
    return con
Exemplo n.º 8
0
def work(num_ep, textEdit, traval_path, weights, dataAug_hf, dataAug_vf,
         dataAug_rot, base_network, curvelist):

    from keras_frcnn import losses as losses
    ##python /run train_frcnn.py -p ./ --input_weight_path ./model_frcnn.hdf5

    sys.setrecursionlimit(40000)

    num_roi = 128
    num_epoch = num_ep
    model_save = "./model_frcnn.hdf5"

    parser = OptionParser()
    parser.add_option("-p",
                      "--path",
                      dest="train_path",
                      help="Path to training data.",
                      default=traval_path)  ###
    parser.add_option("-o",
                      "--parser",
                      dest="parser",
                      help="Parser to use. One of simple or pascal_voc",
                      default="pascal_voc")
    parser.add_option("-n",
                      "--num_rois",
                      type="int",
                      dest="num_rois",
                      help="Number of RoIs to process at once.",
                      default=num_roi)  ###
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default=base_network)  ###resnet50

    parser.add_option(
        "--hf",
        dest="horizontal_flips",
        help="Augment with horizontal flips in training. (Default=false).",
        action="store_true",
        default=dataAug_hf)  ##
    parser.add_option(
        "--vf",
        dest="vertical_flips",
        help="Augment with vertical flips in training. (Default=false).",
        action="store_true",
        default=dataAug_vf)  ##
    parser.add_option(
        "--rot",
        "--rot_90",
        dest="rot_90",
        help="Augment with 90 degree rotations in training. (Default=false).",
        action="store_true",
        default=dataAug_rot)  ##

    parser.add_option("--num_epochs",
                      type="int",
                      dest="num_epochs",
                      help="Number of epochs.",
                      default=num_epoch)  ###2000
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to store all the metadata related to the training (to be used when testing).",
        default="config.pickle")

    parser.add_option("--output_weight_path",
                      dest="output_weight_path",
                      help="Output path for weights.",
                      default=model_save)  ###

    parser.add_option(
        "--input_weight_path",
        dest="input_weight_path",
        help=
        "Input path for weights. If not specified, will try to load default weights provided by keras.",
        default=traval_path + weights)

    (options, args) = parser.parse_args()

    if not options.train_path:  # if filename is not given
        parser.error(
            'Error: path to training data must be specified. Pass --path to command line'
        )

    if options.parser == 'pascal_voc':
        from keras_frcnn.pascal_voc_parser import get_data
    elif options.parser == 'simple':
        from keras_frcnn.simple_parser import get_data
    else:
        raise ValueError(
            "Command line option parser must be one of 'pascal_voc' or 'simple'"
        )

    # pass the settings from the command line, and persist them in the config object
    C = config.Config()

    C.use_horizontal_flips = bool(options.horizontal_flips)
    C.use_vertical_flips = bool(options.vertical_flips)
    C.rot_90 = bool(options.rot_90)

    C.model_path = options.output_weight_path
    C.num_rois = int(options.num_rois)

    if options.network == 'vgg':
        C.network = 'vgg'
        from keras_frcnn import vgg as nn
    elif options.network == 'resnet50':
        from keras_frcnn import resnet as nn
        C.network = 'resnet50'
    else:
        print('Not a valid model')
        raise ValueError

    # check if weight path was passed via command line
    if options.input_weight_path:
        C.base_net_weights = options.input_weight_path
    else:
        # set the path to weights based on backend and model
        C.base_net_weights = nn.get_weight_path()

    all_imgs, classes_count, class_mapping = get_data(
        options.train_path)  #######
    #all_imgs, classes_count, class_mapping = get_data(train_path)#######

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    textEdit.append('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    textEdit.append('Num classes (including bg) = {}'.format(
        len(classes_count)))
    config_output_filename = options.config_filename

    with open(config_output_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))
        textEdit.append(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))
    random.shuffle(all_imgs)  ##随机打乱

    num_imgs = len(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'val']

    print('Num train samples {}'.format(len(train_imgs)))
    textEdit.append('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))
    textEdit.append('Num val samples {}'.format(len(val_imgs)))

    #####RPN计算
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 C,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(C.base_net_weights))
        textEdit.append('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
    except:
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder \
			https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses.rpn_loss_cls(num_anchors),
                          losses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses.class_loss_cls,
            losses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')
    print(' training')
    epoch_length = 1
    num_epochs = int(options.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')
    textEdit.append('Starting training')

    vis = True
    save_num = 1

    loss_rpn_cls_list = []
    loss_rpn_regr_list = []
    loss_class_cls_list = []
    loss_class_regr_list = []
    curr_loss_list = []

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
        textEdit.append('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=0.7,
                                           max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                ####数据更新在这里
                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_cls_list.append(loss_rpn_cls)
                    curvelist[1].setData(loss_rpn_cls_list)
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_rpn_regr_list.append(loss_rpn_regr)
                    curvelist[2].setData(loss_rpn_regr_list)
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_cls_list.append(loss_class_cls)
                    curvelist[3].setData(loss_class_cls_list)
                    loss_class_regr = np.mean(losses[:, 3])
                    loss_class_regr_list.append(loss_class_regr)
                    curvelist[4].setData(loss_class_regr_list)
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        textEdit.append(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        textEdit.append(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        textEdit.append(
                            'Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        textEdit.append(
                            'Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        textEdit.append('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        textEdit.append('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))
                        textEdit.append('Elapsed time: {}'.format(time.time() -
                                                                  start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    curr_loss_list.append(curr_loss)
                    curvelist[0].setData(curr_loss_list)
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                            textEdit.append(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(C.model_path)
                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                textEdit.append('Exception: {}'.format(e))
                continue
        if (epoch_num == (save_num * 30)):
            model_all.save_weights(C.model_path + str(save_num * 30) + ".hdf5")
            save_num = save_num + 1
    print('Training complete, exiting.')
    textEdit.append('Training complete, exiting.')
Exemplo n.º 9
0
def train_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())
    # cfg.base_net_weights=r''

    # TODO: the only file should to be change for other data to train
    cfg.model_path = '/media/private/Ci/log/plane/frcnn/vgg-adam'

    now = datetime.datetime.now()
    day = now.strftime('%y-%m-%d')
    for i in range(10000):
        if not os.path.exists('%s-%s-%d' % (cfg.model_path, day, i)):
            cfg.model_path = '%s-%s-%d' % (cfg.model_path, day, i)
            break

    make_dir(cfg.model_path)
    make_dir(cfg.model_path + '/loss')
    make_dir(cfg.model_path + '/loss_rpn_cls')
    make_dir(cfg.model_path + '/loss_rpn_regr')
    make_dir(cfg.model_path + '/loss_class_cls')
    make_dir(cfg.model_path + '/loss_class_regr')

    cfg.simple_label_file = '/media/public/GEOWAY/plane/plane0817.csv'

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    cfg.config_save_file = os.path.join(cfg.model_path, 'config.pickle')
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')
    Q = multiprocessing.Manager().Queue(maxsize=30)

    def fill_Q(n):
        while True:

            if not Q.full():
                Q.put(next(data_gen_train))
                #print(Q.qsize(),'put',n)
            else:
                time.sleep(0.00001)

    threads = []
    for i in range(4):
        thread = multiprocessing.Process(target=fill_Q, args=(i, ))
        threads.append(thread)
        thread.start()

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    # model_all.summary()
    from keras.utils import plot_model
    # os.environ['PATH'] = os.environ['PATH'] + r';C:\Program Files (x86)\Graphviz2.38\bin;'

    plot_model(model_all,
               'model_all.png',
               show_layer_names=True,
               show_shapes=True)
    plot_model(model_classifier,
               'model_classifier.png',
               show_layer_names=True,
               show_shapes=True)
    plot_model(model_rpn,
               'model_rpn.png',
               show_layer_names=True,
               show_shapes=True)
    '''
    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')
    '''

    optimizer = adadelta()
    optimizer_classifier = adadelta()
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 10
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf
    best_rpn_cls = np.Inf
    best_rpn_regr = np.Inf
    best_class_cls = np.Inf
    best_class_regr = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

            #    X, Y, img_data = next(data_gen_train)
                while True:

                    if Q.empty():
                        time.sleep(0.00001)
                        continue

                    X, Y, img_data = Q.get()
                    #    print(Q.qsize(),'get')
                    break
            #  print(X.shape,Y.shape)
                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss', epoch_num, curr_loss,
                               loss_rpn_cls, loss_rpn_regr, loss_class_cls,
                               loss_class_regr))
                    if loss_rpn_cls < best_rpn_cls:
                        if cfg.verbose:
                            print(
                                'loss_rpn_cls decreased from {} to {}, saving weights'
                                .format(best_rpn_cls, loss_rpn_cls))
                            best_rpn_cls = loss_rpn_cls
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_rpn_cls', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_rpn_regr < best_rpn_regr:
                        if cfg.verbose:
                            print(
                                'loss_rpn_regr decreased from {} to {}, saving weights'
                                .format(best_rpn_regr, loss_rpn_regr))
                            best_rpn_regr = loss_rpn_regr
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_rpn_regr', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_class_cls < best_class_cls:
                        if cfg.verbose:
                            print(
                                'loss_class_cls decreased from {} to {}, saving weights'
                                .format(best_loss, loss_class_cls))
                            best_class_cls = loss_class_cls
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_class_cls', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_class_regr < best_class_regr:
                        if cfg.verbose:
                            print(
                                'loss_class_regr decreased from {} to {}, saving weights'
                                .format(best_loss, loss_class_regr))
                            best_class_regr = loss_class_regr
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_class_regr', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))

                    break

            except Exception as e:
                #   print('Exception: {}'.format(e))
                # save model
                #    model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Exemplo n.º 10
0
def run_train(train_path,
              output_weight_path,
              config_filename,
              parser='simple',
              input_weight_path=None,
              network='resnet50',
              num_rois=32,
              lr=1e-5,
              iters=100,
              num_epochs=100,
              overlap_th=0.7):

    C = config.Config()

    C.model_path = output_weight_path
    C.num_rois = int(num_rois)

    if network == 'vgg':
        C.network = 'vgg'
        from keras_frcnn import vgg as nn
    elif network == 'resnet50':
        from keras_frcnn import resnet as nn
        C.network = 'resnet50'
    else:
        print('Not a valid model')
        raise ValueError

    if parser == 'pascal_voc':
        from keras_frcnn.pascal_voc_parser import get_data
    elif parser == 'simple':
        from keras_frcnn.simple_parser import get_data
    else:
        print('Wrong parser method')
        raise ValueError

    if input_weight_path is not None:
        C.base_net_weights = input_weight_path
    else:
        C.base_net_weights = nn.get_weight_path()

    all_imgs, classes_count, class_mapping = get_data(train_path)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))

    config_output_filename = config_filename

    with open(config_output_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))

    random.shuffle(all_imgs)

    num_imgs = len(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 C,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
    except:
        print('Could not load pretrained model weights...')

    optimizer = Adam(lr=lr)
    optimizer_classifier = Adam(lr=lr)
    from keras_frcnn import losses as losses
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses.rpn_loss_cls(num_anchors),
                          losses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses.class_loss_cls,
            losses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = int(iters)
    num_epochs = int(num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=overlap_th,
                                           max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(C.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue

    print('Training complete, exiting.')
    def start_training(self, num_images=None):
        """
        Start training Keras-FRCNN
        :return: void
        """
        self._config = frcnn_config.Config()
        self._config.num_rois = int(self._num_rois)
        self._config.model_path = generate_filename(self._weights_folder)
        self._config.class_mapping = self._class_mapping

        if self._initial_weights_path is not None:
            self._config.base_net_weights = self._initial_weights_path

        # Define the networks
        if keras_backend.image_dim_ordering() == 'th':
            input_shape_img = (3, None, None)
        else:
            input_shape_img = (None, None, 3)

        img_input = Input(shape=input_shape_img)
        roi_input = Input(shape=(self._config.num_rois, 4))

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        # TODO: These parameters are never set, uses defaults from keras_frcnn.config
        num_anchors = len(self._config.anchor_box_scales) * len(
            self._config.anchor_box_ratios)
        rpn = nn.rpn(shared_layers, num_anchors)

        classifier = nn.classifier(shared_layers,
                                   roi_input,
                                   self._config.num_rois,
                                   nb_classes=len(self._class_mapping),
                                   trainable=True)

        self._model_rpn = Model(img_input, rpn[:2])
        self._model_classifier = Model([img_input, roi_input], classifier)

        # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
        self._model_all = Model([img_input, roi_input], rpn[:2] + classifier)

        # Load pre-trained weights from a file
        # Weights can be found at:
        # https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5
        # https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
        if self._initial_weights_path is not None:
            self._model_rpn.load_weights(self._initial_weights_path,
                                         by_name=True)
            self._model_classifier.load_weights(self._initial_weights_path,
                                                by_name=True)

        optimizer = Adam(lr=1e-4)
        optimizer_classifier = Adam(lr=1e-4)
        self._model_rpn.compile(optimizer=optimizer,
                                loss=[
                                    frcnn_losses.rpn_loss_cls(num_anchors),
                                    frcnn_losses.rpn_loss_regr(num_anchors)
                                ])
        self._model_classifier.compile(
            optimizer=optimizer_classifier,
            loss=[
                frcnn_losses.class_loss_cls,
                frcnn_losses.class_loss_regr(len(self._class_mapping) - 1)
            ],
            metrics={
                'dense_class_{}'.format(len(self._class_mapping)): 'accuracy'
            })
        self._model_all.compile(optimizer='sgd', loss='mae')

        self._best_validation_loss = np.Inf
        if self._self_assessment_interval > 0:
            # If we want to measure the training loss, build a buffer for the losses, and remember the best
            self._training_losses = np.zeros(
                (self._self_assessment_interval, 4))
            self._best_training_loss = np.Inf
            self._sa_image_count = 0
def train_kitti():
    # config for data argument
    cfg = config.Config()
    cfg.balanced_classes = True
    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 50  # 对于星图杯的光学遥感飞机检测,应该改为50+
    cfg.anchor_box_scales = [41, 70, 120, 20, 90]
    cfg.anchor_box_ratios = [[1, 1.4], [1, 0.84], [1, 1.17], [1, 0.64], [1, 1]]

    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'E:/Xingtubei/official_datas/OpticalAircraft/laptop_Chreoc_OpticalAircraft_bboxes.txt'  # '/media/liuhuaqing/Elements/Xingtubei/official_datas/OpticalAircraft/Chreoc_OpticalAircraft_bboxes.txt'#'F:/Xingtubei/official_datas/OpticalAircraft/Chreoc_OpticalAircraft_bboxes.txt' # 'kitti_simple_label.txt'

    all_images, classes_count, class_mapping = get_data(
        cfg.simple_label_file)  #读取数据集,cv2.imread()要求数据里不能有中文路径

    if 'bg' not in classes_count:  #'bg'应该是代表背景
        classes_count['bg'] = 0  # =0表示训练数据中没有“背景”这一类别
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}  #class_mapping的逆向map

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images
                  if s['imageset'] == 'trainval']  #训练集,列表形式,列表中的元素是字典
    val_imgs = [s for s in all_images
                if s['imageset'] == 'test']  #验证集,列表形式,列表中的元素是字典

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(
        train_imgs,
        classes_count,
        cfg,
        nn.get_img_output_length,
        K.image_dim_ordering(),
        mode='train')  #数据扩增,然后生成frcnn所需的训练数据(如:图片、rpn的梯度等等)
    data_gen_val = data_generators.get_anchor_gt(
        val_imgs,
        classes_count,
        cfg,
        nn.get_img_output_length,
        K.image_dim_ordering(),
        mode='val')  #数据扩增,然后生成frcnn所需的验证数据(如:图片、rpn的梯度等等)

    # 根据keras实际用的后端,定义相应的输入数据维度,因为两类后端的维度顺序不一样
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)  #当后端是thaneo
    else:
        input_shape_img = (None, None, 3)  #当后端是tensorflow

    img_input = Input(shape=input_shape_img)  # 输入图片
    roi_input = Input(shape=(None, 4))  # 输入人工标注的roi坐标,4表示x1,y1,x2,y2

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(
        img_input,
        trainable=True)  # shared_layers是frcnn网络底部那些共享的层,在这里是ResNet。由nn定义好

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(
        img_input,
        rpn[:2])  #rpn网络由keras_frcnn/resnet定义好。rpn[:2]的前两个元素分别表示rpn网络的分类输出和回归输出
    model_classifier = Model([img_input, roi_input],
                             classifier)  #Keras的函数式模型为Model,即广义的拥有输入和输出的模型

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input],
                      rpn[:2] + classifier)  #rpn[:2]+classifier的含义是??????

    try:
        # 尝试载入与训练网络权值
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)  # 定义一个Adam求解器,学习率lr
    optimizer_classifier = Adam(lr=1e-5)  # 定义一个Adam求解器,学习率lr
    # num_anchors等于9
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 100  # 每迭代epoch_length次就检查一次是否要保存网络权值,然后重置iter_num = 0
    num_epochs = int(cfg.num_epochs)
    iter_num = 0  # 迭代次数的初值

    losses = np.zeros((epoch_length, 5))  # 初始化loss数组,记录每个周期的loss
    rpn_accuracy_rpn_monitor = []  # 初始化一个数组,记录rpn的训练过程中的精度变化
    rpn_accuracy_for_epoch = []  # 初始化一个数组,记录rpn的每个训练周期的的精度变化
    start_time = time.time()  # 开始训练的时间

    best_loss = np.Inf  # 改变量纪律训练以来最小的loss

    class_mapping_inv = {v: k
                         for k, v in class_mapping.items()
                         }  # class_mapping_inv是一个字典,key是目标类别编号,value是类别名称
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)  # 生成一个进度条对象
        print('Epoch {}/{}'.format(epoch_num + 1,
                                   num_epochs))  # 输出当前训练周期数/总周期数

        while True:  # 什么时候才结束这个循环?答:第247行的break(每迭代epoch_length次)
            try:

                if len(
                        rpn_accuracy_rpn_monitor
                ) == epoch_length and cfg.verbose:  # 每epoch_length次训练周期就在窗口显示一次RPN平均精度
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

                #X应该是图像,如kitti尺寸是(1,600,1987,3)。Y是label,img_data是字典,包含文件名、尺寸、人工标记的roi和类别等
                X, Y, img_data = next(data_gen_train)
                Y_1 = Y[0]
                Y_1 = Y_1[0, :, :, :]

                loss_rpn = model_rpn.train_on_batch(
                    X, Y)  #为什么Y的尺寸与P_rpn的尺寸不同?为什么loss_rpn的尺寸是3,含义是什么,在哪里定义的?

                P_rpn = model_rpn.predict_on_batch(
                    X)  #P_rpn的尺寸是(1, 124, 38, 9) (1, 124, 38, 36)

                result = roi_helpers.rpn_to_roi(
                    P_rpn[0],
                    P_rpn[1],
                    cfg,
                    K.image_dim_ordering(),
                    use_regr=True,
                    overlap_thresh=0.7,
                    max_boxes=300)  #result的尺寸是300*4
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                # X2的尺寸是100*4,Y1的尺寸是1*100*8(8=训练集中目标类别总数),IouS尺寸是100
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping
                )  #Y2的尺寸是1*1*56,56=28*2,(28=4*7)前28是coords,后28是labels(是该类别则标1)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(
                    Y1[0, :, -1] == 1)  #Y1的尺寸是1*1*8表示分类预测结果,最后一个元素为1表示是背景
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]
                     ])  #用rpn输出的roi输入给classifier

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:  # 每迭代epoch_length次就检查一次是否要保存网络权值,然后重置iter_num = 0
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Exemplo n.º 13
0
def build_models(weight_path,init_models = False,train_view_only = False,create_siam = False):
	##
	if train_view_only:
		trainable_cls = False
		trainable_view = True
	else:
		trainable_cls = True
		trainable_view = True
	# define the base network (resnet here, can be VGG, Inception, etc)
	shared_layers = nn.nn_base(img_input, trainable=trainable_cls)

	# define the RPN, built on the base layers
	num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
	rpn = nn.rpn(shared_layers, num_anchors)


	# classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable_cls=trainable_cls,trainable_view=trainable_view)
	classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable_cls=trainable_cls,trainable_view=trainable_view)

	model_rpn = Model(img_input, rpn[:2])

	model_classifier = Model([img_input, roi_input], classifier)
	# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
	model_all = Model([img_input, roi_input], rpn[:2] + classifier)

	if init_models:
		try:
			print('loading weights from {}'.format(C.base_net_weights))
			model_rpn.load_weights(C.base_net_weights, by_name=True)
			model_classifier.load_weights(C.base_net_weights, by_name=True)
		except:
			print('Could not load pretrained model weights. Weights can be found at {} and {}'.format(
				'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5',
				'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
			))

	## load pre-trained net


	# roi_helpers.compere_weights(model_classifier.get_weights(),model_rpn.get_weights(),0,0)
	model_rpn.load_weights(weight_path, by_name=True)
	model_classifier.load_weights(weight_path, by_name=True)


	model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
	model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1),losses.class_loss_view(len(classes_count),roi_num=C.num_rois)], metrics=['accuracy'])
	model_all.compile(optimizer='sgd', loss='mae')

	if create_siam:
		model_view_only = Model([img_input, roi_input], classifier[2])

		## use the feature map after rpn,train only the view module
		view_a = model_view_only([img_input_a,roi_input_a])
		view_b = model_view_only([img_input_b,roi_input_b])


		# use the relevant part of the view vec
		view_a1 = Lambda(mat_lambda,output_shape=mat_lambda_output_shape)(view_a)
		view_b1 = Lambda(mat_lambda,output_shape=mat_lambda_output_shape)(view_b)


		view_a1 = Activation('softmax')(view_a1)
		view_b1 = Activation('softmax')(view_b1)

		flat_a = Flatten()(view_a1)
		flat_b = Flatten()(view_b1)
		##

		distance_siam = Lambda(euclidean_distance,
				output_shape=eucl_dist_output_shape)([flat_a, flat_b])
		##

		# view_non_flip = Lambda(slice_vec,output_shape=slice_vec_output_shape)([view_a,labels_input])
		# view_flip = Lambda(slice_vec,output_shape=slice_vec_output_shape)([view_b,labels_input])
		view_non_flip = SliceTensor(len(class_mapping),C.num_rois)([view_a,labels_input])
		view_flip = SliceTensor(len(class_mapping),C.num_rois)([view_b,labels_input])
		# view_flip = K.expand_dims(view_flip,axis=0)
		view_flip = Lambda(mat_flip_lambda,output_shape=mat_lambda_flip_output_shape)(view_flip)

		view_flip = Activation('softmax')(view_flip)
		view_non_flip = Activation('softmax')(view_non_flip)

		flat_flip = Flatten()(view_flip)
		flat_non_flip = Flatten()(view_non_flip)

		distance_flip = Lambda(euclidean_distance,
				output_shape=eucl_dist_output_shape)([flat_non_flip, flat_flip])


		model_flip = Model([img_input_a, roi_input_a, img_input_b, roi_input_b,labels_input], distance_flip)
		model_flip.compile(loss=contrastive_loss, optimizer=rms)


		model_siam = Model([img_input_a, roi_input_a, img_input_b, roi_input_b], distance_siam)
		model_view_only.compile(optimizer=optimizer_view_only,loss=losses.class_loss_view(len(classes_count),roi_num=C.num_rois),metrics=['accuracy'])
		model_siam.compile(loss=contrastive_loss, optimizer=rms)
		return model_rpn, model_classifier, model_all, model_view_only, model_siam,model_flip

	else:
		return model_rpn, model_classifier, model_all
Exemplo n.º 14
0
	model_rpn.load_weights(C.base_net_weights, by_name=True)   #TODO: load RPN weights
# 	# model_classifier.load_weights(C.base_net_weights, by_name=True)
	# print('loading weights from {}'.format(C.model_path))
	# model_all.load_weights(C.model_path, by_name = True)
except:
	print('Could not load pretrained model weights. Weights can be found at {} and {}'.format(
		'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5',
		'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
	))

optimizer_rpn = Nadam(lr=1e-7)
optimizer_classifier = Nadam(lr=1e-6)
model_rpn.compile(optimizer=optimizer_rpn, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])


classifier_loss = [losses.class_loss_face(), losses.class_loss_regr()]
classifier_loss_weight = [C.lambda_face, C.lambda_regr]

#classifier_loss = [losses.class_loss_face(),losses.class_loss_pose(),losses.class_loss_gender(), losses.class_loss_viz(),losses.class_loss_landmark()]
#classifier_loss_weight = [C.lambda_face,C.lambda_pose,C.lambda_gender,C.lambda_viz,C.lambda_landmark]
model_classifier.compile(optimizer=optimizer_classifier, loss=classifier_loss , loss_weights= classifier_loss_weight )



# model_classifier.compile(optimizer=optimizer_classifier, loss=losses.class_loss_overall(C))
model_all.compile(optimizer='sgd', loss='mae')

epoch_length = 100
num_epochs = int(options.num_epochs)
num_epochs = 210 * 10
iter_num = 0
Exemplo n.º 15
0
def Train_frcnn(
        train_path='./data/flickr_logos_27_dataset_training_set_annotation.txt',  # path to the text file containing the data
        network_arch='vgg',  # the type of the base faster rcnn network architecture
        num_epochs=50,  # num of epochs
        output_weight_path='./models/model_frcnn.hdf5',  # path to save the model_all.weights as hdf5
        preprocessing_function=None,
        config_filename="config.pickle",
        input_weights_path='./models/vgg16_weights_tf_dim_ordering_tf_kernels.h5',
        train_rpn=True,
        train_final_classifier=True,
        train_base_nn=True,
        losses_to_watch=['rpn_cls', 'rpn_reg', 'final_cls', 'final_reg'],
        tb_log_dir="log",
        num_rois=32,
        horizontal_flips=False,
        vertical_flips=False,
        rot_90=False,
        anchor_box_scales=[128, 256, 512],
        anchor_box_ratios=[[1, 1], [1. / math.sqrt(2), 2. / math.sqrt(2)],
                           [2. / math.sqrt(2), 1. / math.sqrt(2)]],
        im_size=600,
        rpn_stride=16,  # depends on network architecture
        visualize_model=None,
        verify_trainable=True,
        optimizer_rpn=Adam(lr=1e-5),
        optimizer_classifier=Adam(lr=1e-5),
        validation_interval=3,
        rpn_min_overlap=0.3,
        rpn_max_overlap=0.7,
        classifier_min_overlap=0.1,
        classifier_max_overlap=0.5,
        rpn_nms_threshold=0.7,  # original implementation
        seed=5000):
    """
    Trains a Faster RCNN for object detection in keras
    
    NOTE: This trains 2 models namely model_rpn and model_classifer with the same shared base_nn (fixed feature extractor)
          
    Keyword Arguments
    train_path -- str: path to the text file or pascal_voc (no Default)
    network_arch --object: the full faster rcnn network .py file passed as an object (no default)
    num_epochs -- int: number of epochs to train (no Default)
    output_weight_path --str: path to save the frcnn weights (no Default)
    preprocessing_function --function: Optional preprocessing function (must be defined like given in keras docs) (Default None)
    config_filename --str: Path to save the config file. Used when testing (Default "config.pickle")
    input_weight_path --str: Path to hdf5 file containing weights for the model (Default None)
                             you can pass path to both classification and detection checkpoints as long as the names dont' change
    train_rpn --bool: whether to train the rpn layer (Default True)
    train_final_classifier --bool:Whether to train the final_classifier (Fast Rcnn layer) (Default True)
    train_base_nn --bool:Whether to train the base_nn/fixed_feature_extractor (Default True)
    losses_to_watch --list: A list of losses to watch (Default ['rpn_cls','rpn_reg','final_cls','final_reg']).
                            The losses in this list are added and then weights are saved wrt to that.
                            The list can contain any combination of the above 4 only.
    tb_log_dir --str: path to log dir for tensorboard logging (Default 'log')
    num_rois --int: The number of rois to use at once (Default = 32)
    horizontal_flips --bool: augment training data by horizontal flips (Default False)
    vertical_flips --bool: augment training data by vertical flips (Default False)
    rot_90 --bool: augment training data by 90 deg rotations (Default False)
    anchor_box_scales --list: The list of anchor box scales to use (Default [128,256,512])
    anchor_box ratios --list of list: The list of anchorbox aspect ratios to use (Default [[1, 1], [1./math.sqrt(2), 2./math.sqrt(2)], [2./math.sqrt(2), 1./math.sqrt(2)]])
    im_size --int: The size to resize the image (Default 600). This is the smallest side of Pascal VOC format
    rpn_stride --int: The stride for rpn (Default = 16)
    visualize_model --str: Path to save the model as .png file
    verify_trainable --bool: print layer wise names and prints if it is trainable or not (Default True)
    optimizer_rpn --keras.optimizer: The optimizer for rpn (Default Adam(lr=1e-5))
    optimizer_classifier --keras.optimizer: The optimizer for classifier (Default Adam(lr=1e-5))
    validation_interval --int: The frequency (in epochs) to do validation. supply 0 if no validation
    rpn_min_overlap --float: (0,1) The Min IOU in rpn layer (Default 0.3) (original implementation)
    rpn_max_overlap --float: (0,1) The max IOU in rpn layer (Default 0.7) (original implementation)
    classifier_min_overlap --float: (0,1) same as above but in final classifier (Default 0.1) (original implementation)
    classifier_max_overlap --float: (0,1) same as above (Default 0.5) (original implementation)
    rpn_nms_threshold --float :(0,1) The threshold above which to supress the bbox using Non max supression in rpn (Default 0.7)(from original implementation)
    seed --int: To seed the random shuffling of training data (Default = 5000)
    
    Performing alternating training:
    - Use the train_rpn,train_final_classifier and train_base_nn boolean arguments to accomplish
    alternating training.
    - While using the above arguments change the members of losses_to_watch = ['rpn_cls','rpn_reg','final_cls','final_reg']
      accordingly else it will throw error
    - for eg if you are training only the base_nn and the rpn set:
         train_rpn = True
         train_base_nn = True
         train_final_classifier = False
         losses_to_watch = ['rpn_cls','rpn_reg'] (do not include 'final_cls', 'final_reg')
    
    OUTPUT:
    prints the training log. Does not return anything
    
    Save details:
    1.saves the weights of the full FRCNN model as .h5
    2.saves a tensorboard file
    3.saves the history of weights saved in ./saving_log.txt so that it can be known at which epoch the model is saved
    4.saves the model configuration as a .pickle file
    5.optionally saves the full FRCNN architecture as .png
    
    NOTE: 
    as of now the batch size = 1
    Prints loss = 0 for losses from model which is not being trained
    
    TODO: The training is a bit slow because of the data generation step. Generate_data in multiple threads and queue them for faster training
    
    """
    check_list = ['rpn_cls', 'rpn_reg', 'final_cls', 'final_reg']
    for n in losses_to_watch:
        if n not in check_list:
            raise ValueError(
                "unsupported loss the supported losses are: {}".format(
                    check_list))

    if not train_rpn:
        if "rpn_cls" in losses_to_watch or "rpn_reg" in losses_to_watch:
            raise ValueError(
                "Cannot watch rpn_cls and rpn_reg when train_rpn == False")
    if not train_final_classifier:
        if "final_cls" in losses_to_watch or "final_reg" in losses_to_watch:
            raise ValueError(
                "cannot watch final_cls and final_reg when train_final_classifier == False"
            )

    if network_arch == 'vgg':
        from keras_frcnn import nn_arch_vgg16 as nn
    elif network_arch == 'resnet50':
        from keras_frcnn import nn_arch_resnet50 as nn
    else:
        print('Not a valid model')
        raise ValueError

    random.seed(seed)
    np.random.seed(seed)

    # pass the settings from the function call, and persist them in the config object
    C = config.Config()
    C.rpn_max_overlap = rpn_max_overlap
    C.rpn_min_overlap = rpn_min_overlap
    C.classifier_min_overlap = classifier_min_overlap
    C.classifier_max_overlap = classifier_max_overlap
    C.anchor_box_scales = anchor_box_scales
    C.anchor_box_ratios = anchor_box_ratios
    C.im_size = im_size
    C.use_horizontal_flips = bool(horizontal_flips)
    C.use_vertical_flips = bool(vertical_flips)
    C.rot_90 = bool(rot_90)
    C.rpn_stride = rpn_stride
    C.rpn_nms_threshold = rpn_nms_threshold
    C.weights_all_path = output_weight_path
    C.num_rois = int(num_rois)

    # check if weight path was passed via command line
    if input_weights_path:
        C.initial_weights = input_weights_path

    all_imgs, classes_count, class_mapping = get_data(train_path)

    print("The class mapping is:")
    print(class_mapping)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))

    with open(config_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_filename))

    np.random.shuffle(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    input_shape_img = (None, None, 3)
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=train_base_nn)
    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors, trainable=train_rpn)
    # define the classifier, built on base layers
    classifier = nn.classifier(shared_layers,
                               roi_input,
                               C.num_rois,
                               len(classes_count),
                               trainable=train_final_classifier)
    # create models
    model_base = Model(img_input,
                       shared_layers)  # for computing the output shape
    model_rpn = Model(img_input, rpn[:2])  # used for training
    model_classifier = Model([img_input, roi_input],
                             classifier)  # used for training
    # this is a model that holds both the RPN and the classifier, used to load/save and freeze/unfreeze weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    # tensorboard
    tbCallBack = TensorBoard(log_dir=tb_log_dir,
                             histogram_freq=1,
                             write_graph=False,
                             write_images=False)
    tbCallBack.set_model(model_all)

    #NOTE: both model_rpn and model_classifer contains the base_nn

    try:
        print('loading weights from {}'.format(C.initial_weights))
        model_all.load_weights(C.initial_weights, by_name=True)
    except:
        print('Could not load pretrained model weights')

    # number of trainable parameters
    trainable_count = int(
        np.sum([K.count_params(p) for p in set(model_all.trainable_weights)]))
    non_trainable_count = int(
        np.sum(
            [K.count_params(p) for p in set(model_all.non_trainable_weights)]))

    print('Total params: {:,}'.format(trainable_count + non_trainable_count))
    print('Trainable params: {:,}'.format(trainable_count))
    print('Non-trainable params: {:,}'.format(non_trainable_count))

    if verify_trainable:
        for layer in model_all.layers:
            print(layer.name, layer.trainable)

    model_rpn.compile(optimizer=optimizer_rpn,
                      loss=[
                          Losses.rpn_loss_cls(num_anchors),
                          Losses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            Losses.class_loss_cls,
            Losses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mse')
    # save model_all as png for visualization
    if visualize_model != None:
        # from IPython.display import SVG
        # from keras.utils.vis_utils import model_to_dot
        # SVG(model_to_dot(model_all).create(prog='dot', format='svg'))
        plot_model(model=model_all,
                   to_file=visualize_model,
                   show_shapes=True,
                   show_layer_names=True)

    epoch_length = len(train_imgs)
    validation_epoch_length = len(val_imgs)
    num_epochs = int(num_epochs)
    iter_num = 0

    # train and valid data generator
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   model_base,
                                                   K.image_dim_ordering(),
                                                   preprocessing_function,
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 C,
                                                 model_base,
                                                 K.image_dim_ordering(),
                                                 preprocessing_function,
                                                 mode='val')

    losses_val = np.zeros((validation_epoch_length, 5))
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf
    val_best_loss = np.Inf
    val_best_loss_epoch = 0

    print('Starting training')

    def write_log(callback, names, logs, batch_no):
        for name, value in zip(names, logs):
            summary = tf.Summary()
            summary_value = summary.value.add()
            summary_value.simple_value = value
            summary_value.tag = name
            callback.writer.add_summary(summary, batch_no)
            callback.writer.flush()

    train_names = [
        'train_loss_rpn_cls', 'train_loss_rpn_reg', 'train_loss_class_cls',
        'train_loss_class_reg', 'train_total_loss', 'train_acc'
    ]
    val_names = [
        'val_loss_rpn_cls', 'val_loss_rpn_reg', 'val_loss_class_cls',
        'val_loss_class_reg', 'val_total_loss', 'val_acc'
    ]

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                if train_rpn:
                    loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=C.rpn_nms_threshold,
                                           flag="train")
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                if train_final_classifier:
                    loss_class = model_classifier.train_on_batch(
                        [X, X2[:, sel_samples, :]],
                        [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                # losses

                if train_rpn:
                    losses[iter_num, 0] = loss_rpn[1]
                    losses[iter_num, 1] = loss_rpn[2]
                else:
                    losses[iter_num, 0] = 0
                    losses[iter_num, 1] = 0

                if train_final_classifier:
                    losses[iter_num, 2] = loss_class[1]
                    losses[iter_num, 3] = loss_class[2]
                    losses[iter_num, 4] = loss_class[3]  # accuracy
                else:
                    losses[iter_num, 2] = 0
                    losses[iter_num, 3] = 0
                    losses[iter_num, 4] = 0

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    if train_rpn:
                        loss_rpn_cls = np.mean(losses[:, 0])
                        loss_rpn_regr = np.mean(losses[:, 1])
                    else:
                        loss_rpn_cls = 0
                        loss_rpn_regr = 0

                    if train_final_classifier:
                        loss_class_cls = np.mean(losses[:, 2])
                        loss_class_regr = np.mean(losses[:, 3])
                        class_acc = np.mean(losses[:, 4])
                    else:
                        loss_class_cls = 0
                        loss_class_regr = 0
                        class_acc = 0

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    loss_dict_train = {
                        "rpn_cls": loss_rpn_cls,
                        "rpn_reg": loss_rpn_regr,
                        "final_cls": loss_class_cls,
                        "final_reg": loss_class_regr
                    }

                    curr_loss = 0
                    for l in losses_to_watch:
                        curr_loss += loss_dict_train[l]

                    iter_num = 0
                    start_time = time.time()
                    write_log(tbCallBack, train_names, [
                        loss_rpn_cls, loss_rpn_regr, loss_class_cls,
                        loss_class_regr, curr_loss, class_acc
                    ], epoch_num)

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {} in training, saving weights'
                                .format(best_loss, curr_loss))
                            save_log_data = '\nTotal loss decreased from {} to {} in epoch {}/{} in training, saving weights'.format(
                                best_loss, curr_loss, epoch_num + 1,
                                num_epochs)
                            with open("./saving_log.txt", "a") as f:
                                f.write(save_log_data)

                        best_loss = curr_loss
                        model_all.save_weights(C.weights_all_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue

        if validation_interval > 0:
            # validation
            if (epoch_num + 1) % validation_interval == 0:
                progbar = generic_utils.Progbar(validation_epoch_length)
                print("Validation... \n")
                while True:
                    try:
                        X, Y, img_data = next(data_gen_val)

                        if train_rpn:
                            val_loss_rpn = model_rpn.test_on_batch(X, Y)

                        P_rpn = model_rpn.predict_on_batch(X)
                        R = roi_helpers.rpn_to_roi(
                            P_rpn[0],
                            P_rpn[1],
                            C,
                            K.image_dim_ordering(),
                            use_regr=True,
                            overlap_thresh=C.rpn_nms_threshold,
                            flag="train")
                        # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                        X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                            R, img_data, C, class_mapping)

                        neg_samples = np.where(Y1[0, :, -1] == 1)
                        pos_samples = np.where(Y1[0, :, -1] == 0)

                        if len(neg_samples) > 0:
                            neg_samples = neg_samples[0]
                        else:
                            neg_samples = []

                        if len(pos_samples) > 0:
                            pos_samples = pos_samples[0]
                        else:
                            pos_samples = []

                        rpn_accuracy_rpn_monitor.append(len(pos_samples))
                        rpn_accuracy_for_epoch.append((len(pos_samples)))

                        if C.num_rois > 1:
                            if len(pos_samples) < C.num_rois // 2:
                                selected_pos_samples = pos_samples.tolist()
                            else:
                                selected_pos_samples = np.random.choice(
                                    pos_samples,
                                    C.num_rois // 2,
                                    replace=False).tolist()
                            try:
                                selected_neg_samples = np.random.choice(
                                    neg_samples,
                                    C.num_rois - len(selected_pos_samples),
                                    replace=False).tolist()
                            except:
                                selected_neg_samples = np.random.choice(
                                    neg_samples,
                                    C.num_rois - len(selected_pos_samples),
                                    replace=True).tolist()

                            sel_samples = selected_pos_samples + selected_neg_samples
                        else:
                            # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                            selected_pos_samples = pos_samples.tolist()
                            selected_neg_samples = neg_samples.tolist()
                            if np.random.randint(0, 2):
                                sel_samples = random.choice(neg_samples)
                            else:
                                sel_samples = random.choice(pos_samples)
                        if train_final_classifier:
                            val_loss_class = model_classifier.test_on_batch(
                                [X, X2[:, sel_samples, :]],
                                [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                        if train_rpn:
                            losses_val[iter_num, 0] = val_loss_rpn[1]
                            losses_val[iter_num, 1] = val_loss_rpn[2]
                        else:
                            losses_val[iter_num, 0] = 0
                            losses_val[iter_num, 1] = 0

                        if train_final_classifier:
                            losses_val[iter_num, 2] = val_loss_class[1]
                            losses_val[iter_num, 3] = val_loss_class[2]
                            losses_val[iter_num, 4] = val_loss_class[3]
                        else:
                            losses_val[iter_num, 2] = 0
                            losses_val[iter_num, 3] = 0
                            losses_val[iter_num, 4] = 0

                        iter_num += 1

                        progbar.update(
                            iter_num,
                            [('rpn_cls', np.mean(losses_val[:iter_num, 0])),
                             ('rpn_regr', np.mean(losses_val[:iter_num, 1])),
                             ('detector_cls', np.mean(losses_val[:iter_num,
                                                                 2])),
                             ('detector_regr', np.mean(losses_val[:iter_num,
                                                                  3]))])

                        if iter_num == validation_epoch_length:
                            if train_rpn:
                                val_loss_rpn_cls = np.mean(losses_val[:, 0])
                                val_loss_rpn_regr = np.mean(losses_val[:, 1])
                            else:
                                val_loss_rpn_cls = 0
                                val_loss_rpn_regr = 0
                            if train_final_classifier:
                                val_loss_class_cls = np.mean(losses_val[:, 2])
                                val_loss_class_regr = np.mean(losses_val[:, 3])
                                val_class_acc = np.mean(losses_val[:, 4])
                            else:
                                val_loss_class_cls = 0
                                val_loss_class_regr = 0
                                val_class_acc = 0

                            mean_overlapping_bboxes = float(
                                sum(rpn_accuracy_for_epoch)) / len(
                                    rpn_accuracy_for_epoch)
                            rpn_accuracy_for_epoch = []

                            loss_dict_valid = {
                                "rpn_cls": val_loss_rpn_cls,
                                "rpn_reg": val_loss_rpn_regr,
                                "final_cls": val_loss_class_cls,
                                "final_reg": val_loss_class_regr
                            }

                            val_curr_loss = 0
                            for l in losses_to_watch:
                                val_curr_loss += loss_dict_valid[l]

                            write_log(tbCallBack, val_names, [
                                val_loss_rpn_cls, val_loss_rpn_regr,
                                val_loss_class_cls, val_loss_class_regr,
                                val_curr_loss, val_class_acc
                            ], epoch_num)

                            if C.verbose:
                                print('[INFO VALIDATION]')
                                print(
                                    'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                                    .format(mean_overlapping_bboxes))
                                print(
                                    'Classifier accuracy for bounding boxes from RPN: {}'
                                    .format(val_class_acc))
                                print('Loss RPN classifier: {}'.format(
                                    val_loss_rpn_cls))
                                print('Loss RPN regression: {}'.format(
                                    val_loss_rpn_regr))
                                print('Loss Detector classifier: {}'.format(
                                    val_loss_class_cls))
                                print('Loss Detector regression: {}'.format(
                                    val_loss_class_regr))
                                print(
                                    "current loss: %.2f, best loss: %.2f at epoch: %d"
                                    % (val_curr_loss, val_best_loss,
                                       val_best_loss_epoch))
                                print('Elapsed time: {}'.format(time.time() -
                                                                start_time))

                            if val_curr_loss < val_best_loss:
                                if C.verbose:
                                    print(
                                        'Total loss decreased from {} to {}, saving weights'
                                        .format(val_best_loss, val_curr_loss))
                                    save_log_data = '\nTotal loss decreased from {} to {} in epoch {}/{} in validation, saving weights'.format(
                                        val_best_loss, val_curr_loss,
                                        epoch_num + 1, num_epochs)
                                    with open("./saving_log.txt", "a") as f:
                                        f.write(save_log_data)
                                val_best_loss = val_curr_loss
                                val_best_loss_epoch = epoch_num
                                model_all.save_weights(C.weights_all_path)
                            start_time = time.time()
                            iter_num = 0
                            break
                    except:
                        pass

    print('Training complete, exiting.')
Exemplo n.º 16
0
# define the full model
model = Model([img_input, roi_input], rpn + classifier)

try:
    print 'loading weights from ', C.base_net_weights
    model.load_weights(C.base_net_weights, by_name=True)
except:
    print('Could not load pretrained model weights. Weights can be found at {} and {}'.format(
        'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5',
        'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
    ))

optimizer = Adam(1e-5, decay=0.0)
model.compile(optimizer=optimizer,
              loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors), losses.class_loss_cls,
                    losses.class_loss_regr(C.num_rois, len(classes_count) - 1)],
              metrics={'dense_class_{}_loss'.format(len(classes_count)): 'accuracy'})

nb_epochs = 100

callbacks = [EarlyStopping(monitor='val_loss', patience=20, verbose=0),
             ModelCheckpoint(C.model_path, monitor='val_loss', save_best_only=True, verbose=0),
             ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1)]
train_samples_per_epoch = 500  # len(train_imgs)
nb_val_samples = 100  # len(val_imgs),

print 'Starting training'
import os
import subprocess

proc = subprocess.Popen(["nvidia-smi"], stdout=subprocess.PIPE, shell=True)
Exemplo n.º 17
0
# 这里似乎是三个模型,三个独立的模型.但是三个模型怎么共享权值.

try:
    print('loading weights from {}'.format(cfg.base_net_weights))
    model_rpn.load_weights(cfg.base_net_weights, by_name=True)
    model_classifier.load_weights(cfg.base_net_weights, by_name=True)
except:
    print('Could not load pretrained model weights. Weights can be found in the keras application folder \
		https://github.com/fchollet/keras/tree/master/keras/applications')

optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])

model_classifier.compile(optimizer=optimizer_classifier,
                         loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count) - 1)],
                         metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})

model_all.compile(optimizer='sgd', loss='mae')

epoch_length = 1000
num_epochs = int(cfg.num_epochs)
iter_num = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = np.Inf
Exemplo n.º 18
0
def train_mscoco():
    # ===========================模型的配置和加载======================================
    # config for data argument
    cfg = config.Config()
    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    #resnet前四卷积部分的权值
    cfg.base_net_weights = nn.get_weight_path()
    #保存模型的权重值
    cfg.model_path = './model/mscoco_frcnn.hdf5'
    #all_images, class_mapping = get_data()
    #加载训练的图片
    train_imgs, class_mapping = get_data('train')

    cfg.class_mapping = class_mapping
    print('Num classes (including bg) = {}'.format(len(class_mapping)))
    #保存所有的配置文件
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))
    #图片随机洗牌
    random.shuffle(train_imgs)
    print('Num train samples {}'.format(len(train_imgs)))
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   class_mapping,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    # ==============================================================================

    # ===============================模型的定义======================================
    #keras内核为tensorflow
    input_shape_img = (None, None, 3)
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    # define the base resnet50 network
    shared_layers = nn.nn_base(img_input, trainable=False)
    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    #model(input=,output=)
    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)
    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    # ==============================================================================

    # ===========================基本模型加载ImageNet权值=============================
    try:
        print('loading base model weights from {}'.format(
            cfg.base_net_weights))
        model_rpn.load_weights(cfg.base_net_weights, by_name=True)
        model_classifier.load_weights(cfg.base_net_weights, by_name=True)
    except Exception as e:
        print('基本模型加载ImageNet权值: ', e)
        print('Could not load pretrained model weights on ImageNet.')
    # ==============================================================================

    # ===============================模型优化========================================
    #在调用model.compile()之前初始化一个优化器对象,然后传入该函数
    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(class_mapping) - 1)
        ],
        metrics={'dense_class_{}'.format(len(class_mapping)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')
    # ==============================================================================

    # ================================训练、输出设置==================================
    epoch_length = len(train_imgs)
    num_epochs = int(cfg.num_epochs)
    iter_num = 0
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    best_loss = np.Inf

    logger = Logger(os.path.join('.', 'log.txt'))
    # ==============================================================================

    print('Starting training')
    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        logger.write('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )
                #图片,标准的cls、rgr,盒子数据
                X, Y, img_data = next(data_gen_train)

                #训练rpn
                loss_rpn = model_rpn.train_on_batch(X, Y)

                #边训练rpn得到的区域送入roi
                #x_class, x_regr, base_layers
                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                #区域、cls、rgr、iou
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                #训练classifier
                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        logger.write(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        logger.write(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        logger.write(
                            'Loss RPN classifier: {}'.format(loss_rpn_cls))
                        logger.write(
                            'Loss RPN regression: {}'.format(loss_rpn_regr))
                        logger.write('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        logger.write('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        logger.write('Elapsed time: {}'.format(time.time() -
                                                               start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            logger.write(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)
                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Exemplo n.º 19
0
        ))

optimizer_rpn = Nadam(lr=1e-5)
optimizer_classifier = Nadam(lr=1e-5)
model_rpn.compile(
    optimizer=optimizer_rpn,
    loss=[losses.rpn_loss_cls(num_anchors),
          losses.rpn_loss_regr(num_anchors)])

classifier_loss = [
    losses.class_loss_face(),
    losses.class_loss_pose(),
    losses.class_loss_gender(),
    losses.class_loss_viz(),
    losses.class_loss_landmark(),
    losses.class_loss_regr()
]
classifier_loss_weight = [
    C.lambda_face, C.lambda_pose, C.lambda_gender, C.lambda_viz,
    C.lambda_landmark, C.lambda_regr
]
model_classifier.compile(optimizer=optimizer_classifier,
                         loss=classifier_loss,
                         loss_weights=classifier_loss_weight)

# classifier_loss = {	'face_out':losses.class_loss_face,'pose_out': losses.class_loss_pose(face_true),'gender_out': losses.class_loss_gender(face_true), \
# 					'viz_out': losses.class_loss_viz(face_true),'landmark_out': losses.class_loss_landmark(face_true) }
# classifier_loss_weight = {	'face_out':C.lambda_face,'pose_out': C.lambda_pose,'gender_out': C.lambda_gender, \
# 							'viz_out': C.lambda_viz,'landmark_out': C.lambda_landmark }

bp()
Exemplo n.º 20
0
def train_net():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False
    cfg.num_rois = 32  # config中设置的是4
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = 'samples.hdf5'

    cfg.simple_label_file = 'annotations_train.txt' # 训练集产生的标签

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(
            cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    # there图片
    data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_output_length,
                                                   K.image_dim_ordering(), mode='train')

    data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length,
                                                 K.image_dim_ordering(), mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)
    # classifier是什么?
    # classes_count {} 每一个类的数量:{'cow': 4, 'dog': 10, ...}
    # C.num_rois每次取的感兴趣区域,默认为32
    # roi_input = Input(shape=(None, 4)) 框框
    # classifier是faster rcnn的两个损失函数[out_class, out_reg]
    # shared_layers是vgg的输出feature map
    classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=len(classes_count), trainable=True)
    # 定义model_rpn
    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)])
    model_classifier.compile(optimizer=optimizer_classifier,
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1)],
                             metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 10
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                # 用来监督每一次epoch的平均正回归框的个数
                if len(rpn_accuracy_rpn_monitor) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(
                            mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        # 每次都框不到正样本,说明rpn有问题
                        print('RPN is not producing bounding boxes that overlap'
                              ' the ground truth boxes. Check RPN settings or keep training.')

                # 迭代器,取数据
                # 训练rpn网络,X是图片,Y是对应类别和回归梯度(不是所有的点都参加训练,符合条件才参加训练)
                # next(data_gen_train)是一个迭代器。
                # 返回的是 np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)],
                # img_data_aug(我们这里假设数据没有进行水平翻转等操作。那么,x_img = img_data_aug),
                # y_rpn_cls和y_rpn_regr是RPN的两个损失函数。
                X, Y, img_data = next(data_gen_train)


                # classifer和rpn网络交叉训练
                loss_rpn = model_rpn.train_on_batch(X, Y)
                P_rpn = model_rpn.predict_on_batch(X)

                # result是得到的预选框
                # 得到了region proposals,接下来另一个重要的思想就是ROI pooling,
                # 可将不同shape的特征图转化为固定shape,送到全连接层进行最终的预测。
                # rpn_to_roi接收的是每张图片的预测输出,返回的R = [boxes, probs]
                # ---------------------
                result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)

                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                # Y1根据预选框,得到这个预选框属于哪一类,
                # Y2这个类相应的回归梯度
                # X2是返回这个框
                """
                # 通过calc_iou()找出剩下的不多的region对应ground truth里重合度最高的bbox,从而获得model_classifier的数据和标签。
                # X2保留所有的背景和match bbox的框; Y1 是类别one-hot转码; Y2是对应类别的标签及回归要学习的坐标位置; IouS是debug用的。
                """
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, class_mapping)

                if X2 is None:
                    # 如果没有有效的预选框则结束本次循环
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                # 因为是one—hot,最后一位是1,则代表是背景
                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0] # 将其变为1维的数组
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    # 选择num_rois个数的框,送入classifier网络进行训练。 分类网络一次要训练多少个框
                    # 思路:当num_rois大于1的时候正负样本尽量取到一半,小于1的时候正负样本随机取一个。
                    if len(pos_samples) < cfg.num_rois // 2:
                        # 挑选正样本
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist()
                    try:
                        # 挑选负样本
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                # 训练classifier网络
                # 是从位置中挑选,
                loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]],
                                                             [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                #
                losses[iter_num, 0] = loss_rpn[1] # rpn_cls平均值
                losses[iter_num, 1] = loss_rpn[2] # rpn_regr平均值

                losses[iter_num, 2] = loss_class[1] # detector_cls平均值
                losses[iter_num, 3] = loss_class[2] # detector_regr平均值
                losses[iter_num, 4] = loss_class[3] # 4是准确率

                iter_num += 1

                # 进度条更新
                progbar.update(iter_num,
                               [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                ('detector_cls', np.mean(losses[:iter_num, 2])),
                                ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])  # loss中存放了每一次训练出的losses
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        # 打印出前n次loss的平均值
                        print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(
                            mean_overlapping_bboxes))
                        print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(loss_class_cls))
                        print('Loss Detector regression: {}'.format(loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() - start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        # 当结束一轮的epoch时,只有当这轮epoch的loss小于最优的时候才会存储这轮的训练数据,
                        # 并结束这轮epoch进入下一轮epoch。
                        if cfg.verbose:
                            print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Exemplo n.º 21
0
# define the full model
model = Model([img_input, roi_input], rpn + classifier)

try:
    print 'loading weights from ', C.base_net_weights
    model.load_weights(C.base_net_weights, by_name=True)
except:
    print('Could not load pretrained model weights')

optimizer = Adam(1e-6)
model.compile(optimizer=optimizer,
              loss=[
                  losses.rpn_loss_cls(num_anchors),
                  losses.rpn_loss_regr(num_anchors), losses.class_loss_cls,
                  losses.class_loss_regr(C.num_rois)
              ])

nb_epochs = 50

callbacks = [
    EarlyStopping(monitor='val_loss', patience=2, verbose=0),
    ModelCheckpoint(C.model_path,
                    monitor='val_loss',
                    save_best_only=True,
                    verbose=0)
]
train_samples_per_epoch = 2000  #len(train_imgs)
nb_val_samples = 500  # len(val_imgs),

print 'Starting training'
Exemplo n.º 22
0
def build_models(weight_path,
                 init_models=False,
                 train_view_only=False,
                 create_siam=False):
    ##
    if train_view_only:
        trainable_cls = False
        trainable_view = False
    else:
        trainable_cls = False
        trainable_view = False
    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=trainable_cls)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    # classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable_cls=trainable_cls,trainable_view=trainable_view)
    classifier, inner_layer = nn.classifier(shared_layers,
                                            roi_input,
                                            C.num_rois,
                                            nb_classes=C.num_classes,
                                            trainable_cls=trainable_cls,
                                            trainable_view=trainable_view)

    # L2 normalization for inner layer
    inner_layer = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(inner_layer)

    model_rpn = Model(img_input, rpn[:2])

    model_classifier = Model([img_input, roi_input], classifier)
    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    if init_models:
        try:
            print('loading weights from {}'.format(C.base_net_weights))
            model_rpn.load_weights(C.base_net_weights, by_name=True)
            model_classifier.load_weights(C.base_net_weights, by_name=True)
        except:
            print(
                'Could not load pretrained model weights. Weights can be found at {} and {}'
                .format(
                    'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5',
                    'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
                ))

    ## load pre-trained net

    # roi_helpers.compere_weights(model_classifier.get_weights(),model_rpn.get_weights(),0,0)
    model_rpn.load_weights(weight_path, by_name=True)
    model_classifier.load_weights(weight_path, by_name=True)

    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses.rpn_loss_cls(num_anchors),
                          losses.rpn_loss_regr(num_anchors)
                      ])
    ##no weights
    model_classifier.compile(optimizer=optimizer_classifier,
                             loss=[
                                 losses.class_loss_cls,
                                 losses.class_loss_regr(C.num_classes - 1),
                                 losses.class_loss_view_weight(
                                     C.num_classes, roi_num=C.num_rois)
                             ],
                             metrics=['accuracy'])
    ## with weights
    # model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1),losses.class_loss_view_weight(len(classes_count),roi_num=C.num_rois)], metrics=['accuracy'])

    model_all.compile(optimizer='sgd', loss='mae')

    if create_siam:
        model_view_only = Model([img_input, roi_input], classifier[2])
        model_inner = Model([img_input, roi_input], inner_layer)

        ## use the feature map after rpn,train only the view module
        inner_ref = model_inner([img_input_ref, roi_input_ref])
        inner_dp = model_inner([img_input_dp, roi_input_dp])
        inner_dm = model_inner([img_input_dm, roi_input_dm])
        view_ref_base = model_view_only([img_input_ref, roi_input_ref])
        view_dp_base = model_view_only([img_input_dp, roi_input_dp])
        view_dm_base = model_view_only([img_input_dm, roi_input_dm])

        ## first version - l2 distance

        view_ref = SliceTensor(len(class_mapping),
                               C.num_rois)([view_ref_base, labels_input])
        view_dp = SliceTensor(len(class_mapping),
                              C.num_rois)([view_dp_base, labels_input])
        view_dm = SliceTensor(len(class_mapping),
                              C.num_rois)([view_dm_base, labels_input])

        distance_dp = Lambda(euclidean_distance,
                             output_shape=eucl_dist_output_shape)(
                                 [view_dp, view_ref])

        distance_dm = Lambda(euclidean_distance,
                             output_shape=eucl_dist_output_shape)(
                                 [view_dm, view_ref])
        distance_dp = Lambda(l2_layer,
                             output_shape=l2_layer_output_shape,
                             name='dp_l2_layer')(distance_dp)
        distance_dm = Lambda(l2_layer,
                             output_shape=l2_layer_output_shape,
                             name='dm_l2_layer')(distance_dm)

        # trip = Lambda(trip_layer, output_shape=[1, 2], name='concat_layer')([distance_dp, distance_dm]) # should be comperd to [0,1] in MSE
        trip = Lambda(lambda x: x[0] / (x[1] + K.epsilon()))(
            [distance_dp, distance_dm])

        ## second version for trip distance - cosine distance with softmax
        # cos_dp = Lambda(cosine_distance,
        # 				output_shape=cosine_dist_output_shape)([inner_ref, inner_dp])  # cosine dist <X_ref,X_dp>
        #
        # cos_dm = Lambda(cosine_distance,
        # 				output_shape=cosine_dist_output_shape)([inner_ref, inner_dm])  # cosine dist <X_ref,X_dm>
        # # soft_param = K.ones([1,1])*2
        # soft_param = tf.Variable(initial_value=[8.])
        #
        # # soft_param = K.repeat(soft_param,2)
        # dist = Concatenate(axis=2)([cos_dm, cos_dp])
        # dist = Lambda(lambda x: x * soft_param)(dist)
        # trip = Activation('softmax')(dist)  # should be comperd to [0,1] becase dp shold be small and dm large so after softmax it
        # model_trip = Model([img_input_ref, roi_input_ref, img_input_dp, roi_input_dp, img_input_dm, roi_input_dm], trip)
        # # model_trip.layers[10].trainable_weights.extend([soft_param])
        # model_trip.compile(optimizer=optimizer_trip, loss='categorical_crossentropy')

        ## third version cosine on last layer
        # slice the currect 360 slice
        # view_ref = SliceTensor(len(class_mapping),C.num_rois)([view_ref,labels_input])
        # view_dp = SliceTensor(len(class_mapping),C.num_rois)([view_dp,labels_input])
        # view_dm = SliceTensor(len(class_mapping),C.num_rois)([view_dm,labels_input])
        #
        # # l2 normlize in order to use cosine dist
        # view_ref = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_ref)
        # view_dp = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_dp)
        # view_dm = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_dm)
        #
        #
        # cos_dp = Lambda(cosine_distance,
        # 				output_shape=cosine_dist_output_shape)([view_ref, view_dp])  # cosine dist <X_ref,X_dp>
        #
        # cos_dm = Lambda(cosine_distance,
        # 				output_shape=cosine_dist_output_shape)([view_ref, view_dm])  # cosine dist <X_ref,X_dm>
        # # soft_param = K.ones([1,1])*2
        # soft_param = tf.Variable(initial_value=[8.])
        #
        # # soft_param = K.repeat(soft_param,2)
        # dist = Concatenate(axis=2)([cos_dm, cos_dp])
        # dist = Lambda(lambda x: x * soft_param)(dist)
        # trip = Activation('softmax')(dist)  # should be comperd to [0,1] becase dp shold be small and dm large so after softmax it

        model_trip = Model([
            img_input_ref, roi_input_ref, img_input_dp, roi_input_dp,
            img_input_dm, roi_input_dm, labels_input
        ], [view_ref_base, trip])
        # model_trip.layers[10].trainable_weights.extend([soft_param])

        ## cosine
        # model_trip.compile(optimizer=optimizer_trip, loss=[losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),'categorical_crossentropy'])

        ## l2
        model_trip.compile(optimizer=optimizer_trip,
                           loss=[
                               losses.class_loss_view_weight(
                                   C.num_classes, roi_num=C.num_rois), 'mse'
                           ])

        return model_view_only, model_inner, model_trip

    else:
        return model_rpn, model_classifier, model_all
    model_classifier.load_weights(C.base_net_weights, by_name=True)
except Exception:
    print(
        'Could not load pretrained model weights. Weights can be found in the keras application folder \
		https://github.com/fchollet/keras/tree/master/keras/applications')

optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(
    optimizer=optimizer,
    loss=[losses.rpn_loss_cls(num_anchors),
          losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(
    optimizer=optimizer_classifier,
    loss=[losses.class_loss_cls,
          losses.class_loss_regr(num_classes=1)],  #一共有多少类别,不算背景
    metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

epoch_length = 5
num_epochs = int(options.num_epochs)
iter_num = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = np.Inf

class_mapping_inv = {v: k for k, v in class_mapping.items()}
Exemplo n.º 24
0
# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)

try:
    print('loading weights from {}'.format(C.base_net_weights))
    model_rpn.load_weights(C.base_net_weights, by_name=True)
    model_classifier.load_weights(C.base_net_weights, by_name=True)
except:
    print('Could not load pretrained model weights. Weights can be found in the keras application folder \
        https://github.com/fchollet/keras/tree/master/keras/applications')

optimizer = Adam(lr= 1e-5)
optimizer_classifier = Adam(lr= 1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)],
                         metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

epoch_length = 1000
num_epochs = int(options.num_epochs)
iter_num = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = 1.8663825974439379

class_mapping_inv = {v: k for k, v in class_mapping.items()}
print('Starting training')
# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)

try:
	print('loading weights from {}'.format(C.base_net_weights))
	model_rpn.load_weights(C.base_net_weights, by_name=True)
	model_classifier.load_weights(C.base_net_weights, by_name=True)
except:
	print('Could not load pretrained model weights. Weights can be found in the keras application folder \
		https://github.com/fchollet/keras/tree/master/keras/applications')

optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

epoch_length = 50
num_epochs = int(options.num_epochs)
iter_num = 0

losses = np.zeros((epoch_length, 5))
rpn_accuracy_rpn_monitor = []
rpn_accuracy_for_epoch = []
start_time = time.time()

best_loss = np.Inf

class_mapping_inv = {v: k for k, v in class_mapping.items()}
print('Starting training')
Exemplo n.º 26
0
def build_and_train(hype_space, save_best_weights=False):
    train_path = '/home/comp/e4252392/retraindata4frcnn.txt'
    config_output_filename = '/home/comp/e4252392/hyperopt/hyperopt_config.pickle'
    num_epochs = 20
    #for retrain best model only
    diagnose_path = '/home/comp/e4252392/hyperopt/models/hyperopt_loss_ap_plt.npy'
    real_model_path = '/home/comp/e4252392/hyperopt/models/hyperopt_model_plt_'

    print("Hyperspace:")
    print(hype_space)
    C = config.Config()
    C.num_rois = int(hype_space['num_rois'])  #why int?
    # C.anchor_box_scales = hype_space['anchor_box_scales']
    # C.base_net_weights = '/home/comp/e4252392/second_res_more_epoch.h5'
    C.base_net_weights = 'model_frcnn.hdf5'

    #data
    all_imgs, classes_count, class_mapping = get_data(train_path)
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)
    C.class_mapping = class_mapping

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))

    with open(config_output_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))

    random.shuffle(all_imgs)
    num_imgs = len(all_imgs)
    train_imgs = [s for s in all_imgs]
    print('Num train samples {}'.format(len(train_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    #data

    # build_model
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    shared_layers = nn.nn_base(int(hype_space['kernel_size']),
                               img_input,
                               trainable=True)

    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(int(hype_space['kernel_size']), shared_layers, num_anchors)

    classifier = nn.classifier(int(hype_space['kernel_size']),
                               shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
    except:
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder \
			https://github.com/fchollet/keras/tree/master/keras/applications')

    # optimizer = Adam(lr=1e-5)
    # optimizer_classifier = Adam(lr=1e-5)
    optimizer = Adam(lr=hype_space['optimizer_lr'],
                     decay=hype_space['optimizer_decay'])
    optimizer_classifier = Adam(lr=hype_space['optimizer_lr'],
                                decay=hype_space['optimizer_decay'])
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          thelosses.rpn_loss_cls(num_anchors),
                          thelosses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            thelosses.class_loss_cls,
            thelosses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    sgd = SGD(lr=hype_space['sgd_lr'], decay=hype_space['sgd_decay'])
    model_all.compile(optimizer=sgd, loss='mae')
    # build_model

    #build_and_train
    epoch_length = 10
    iter_num = 0
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    best_loss = np.Inf
    print('Starting training')

    loss_array = []
    ap_array = []
    epoch_array = []
    epoch_array.append(0)

    result = {}
    model_name = ''

    for epoch_num in range(num_epochs):
        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                # train
                X, Y, img_data = next(data_gen_train)
                loss_rpn = model_rpn.train_on_batch(X, Y)
                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=0.7,
                                           max_boxes=300)
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)
                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue
                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)
                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []
                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []
                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
                # train

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]
                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1
                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    # result
                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss

                        if save_best_weights:
                            real_model_path = real_model_path + str(
                                epoch_num + 1) + '.hdf5'
                            model_all.save_weights(real_model_path,
                                                   overwrite=True)
                            print("Best weights so far saved to " +
                                  real_model_path + ". best_loss = " +
                                  str(best_loss))
                            epoch_array.append(epoch_num + 1)
                            loss_array.append([
                                loss_rpn_cls, loss_rpn_regr, loss_class_cls,
                                loss_class_regr, best_loss
                            ])
                            album_ap, logo_ap, mAP = measure_map.measure_map(
                                config_output_filename, real_model_path)
                            ap_array.append([album_ap, logo_ap, mAP])
                            np.save(diagnose_path,
                                    [epoch_array, loss_array, ap_array])
                        else:
                            album_ap = 'not applicable'
                            logo_ap = 'not applicable'
                            mAP = 'not applicable'
                        model_name = "model_{}_{}".format(
                            str(best_loss),
                            str(uuid.uuid4())[:5])
                        result = {
                            'loss': best_loss,
                            'loss_rpn_cls': loss_rpn_cls,
                            'loss_rpn_regr': loss_rpn_regr,
                            'loss_class_cls': loss_class_cls,
                            'loss_class_regr': loss_class_regr,
                            'album_ap': album_ap,
                            'logo_ap': logo_ap,
                            'mAP': mAP,
                            'model_name': model_name,
                            'space': hype_space,
                            'status': STATUS_OK
                        }
                        print("RESULT UPDATED.")
                        print("Model name: {}".format(model_name))
                    # result
                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue

    print('Training complete, exiting.')
    print("BEST MODEL: {}".format(model_name))
    print("FINAL RESULT:")
    print_json(result)
    save_json_result(model_name, result)
    try:
        K.clear_session()
        del model_all, model_rpn, model_classifier
    except Exception as err:
        try:
            K.clear_session()
        except:
            pass
        err_str = str(err)
        print(err_str)
        traceback_str = str(traceback.format_exc())
        print(traceback_str)
        return {
            'status': STATUS_FAIL,
            'err': err_str,
            'traceback': traceback_str
        }
    print("\n\n")
    return model_name, result
Exemplo n.º 27
0
def train_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'kitti_simple_label.txt'

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(
            cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_output_length,
                                                   K.image_dim_ordering(), mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length,
                                                 K.image_dim_ordering(), mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=len(classes_count), trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)])
    model_classifier.compile(optimizer=optimizer_classifier,
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1)],
                             metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 1000
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(
                            mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print('RPN is not producing bounding boxes that overlap'
                              ' the ground truth boxes. Check RPN settings or keep training.')

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]],
                                                             [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(iter_num,
                               [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                ('detector_cls', np.mean(losses[:iter_num, 2])),
                                ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(
                            mean_overlapping_bboxes))
                        print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(loss_class_cls))
                        print('Loss Detector regression: {}'.format(loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() - start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Exemplo n.º 28
0
num_rois = 32

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(anchor_box_scales) * len(anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(shared_layers, roi_input, num_rois, nb_classes=21, trainable=True)

model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)

# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)


optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier,
                         loss=[losses.class_loss_cls, losses.class_loss_regr(21-1)],
                         metrics={'dense_class_{}'.format(21): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')

print model_all.summary()