示例#1
0
    def train(self, sess, data_gen):

        start_time = time.time()
        # continues until no more training data is generated
        losses, batch, acc, s_losses = 0, 0, 0, 0

        pbar = tqdm(total=1434)
        while data_gen.has_data():
            x_batch, bbox_batch, y_batch = data_gen.get_batch(
                config.batch_size)

            # runs network on batch
            _, loss, s_loss, preds = sess.run(
                [
                    self.train_op, self.class_loss, self.segmentation_loss,
                    self.digit_preds
                ],
                feed_dict={
                    self.x_input: x_batch,
                    self.y_input: y_batch,
                    self.m: self.cur_m,
                    self.is_train: True,
                    self.y_bbox: bbox_batch
                })
            pbar.update()

            # accumulates loses and accuracies
            acc += np.count_nonzero(
                np.argmax(preds, axis=1) == np.array(
                    y_batch)) / config.batch_size

            losses += loss
            s_losses += s_loss
            batch += 1
            if np.isnan(preds[0][0]):
                print(preds[0][:10])
                print('NAN encountered.')
                config.write_output('NAN encountered.\n')
                return -1, -1, -1

            # prints the loss and accuracy statistics after a certain number of batches
            if batch % config.batches_until_print == 0:
                print(
                    preds[0][:10]
                )  # prints activations just in case of numerical instability
                print(
                    'Finished %d batches. %d(s) since start. Avg Classification Loss is %.4f. '
                    'Avg Segmentation Loss is %.4f. Accuracy is %.4f.' %
                    (batch, time.time() - start_time, losses / batch,
                     s_losses / batch, acc / batch))
            print(preds[0][:10])
        pbar.close()
        print(
            'Epoch finished in %d(s). Avg Classification loss is %.4f. Avg Segmentation Loss is %.4f. '
            'Accuracy is %.4f.' % (time.time() - start_time, losses / batch,
                                   s_losses / batch, acc / batch))

        return losses / batch, s_losses / batch, acc / batch
示例#2
0
def train_network(gpu_config):
    capsnet = Caps3d()

    with tf.Session(graph=capsnet.graph, config=gpu_config) as sess:
        tf.global_variables_initializer().run()

        get_num_params()
        config.clear_output()

        n_eps_after_acc, best_loss = -1, 100000
        print('Training on UCF101')
        for ep in range(1, config.n_epochs + 1):
            print(20 * '*', 'epoch', ep, 20 * '*')

            # trains network for one epoch
            data_gen = TrainDataGen(config.wait_for_data,
                                    frame_skip=config.frame_skip)
            margin_loss, seg_loss, acc = capsnet.train(sess, data_gen)
            config.write_output('CL: %.4f. SL: %.4f. Acc: %.4f\n' %
                                (margin_loss, seg_loss, acc))

            # increments the margin
            if ep % config.n_eps_for_m == 0:
                capsnet.cur_m += config.m_delta
                capsnet.cur_m = min(capsnet.cur_m, 0.9)

            # only validates after a certain number of epochs and when the training accuracy is greater than a threshold
            # this is mainly used to save time, since validation takes about 10 minutes
            if (acc >= config.acc_for_eval
                    or n_eps_after_acc >= 0) and ep >= config.n_eps_until_eval:
                n_eps_after_acc += 1

            # validates the network
            if (acc >= config.acc_for_eval and n_eps_after_acc %
                    config.n_eps_for_eval == 0) or ep == config.n_epochs:
                data_gen = TestDataGen(config.wait_for_data, frame_skip=1)
                margin_loss, seg_loss, accuracy, _ = capsnet.eval(
                    sess, data_gen, validation=True)

                config.write_output(
                    'Validation\tCL: %.4f. SL: %.4f. Acc: %.4f.\n' %
                    (margin_loss, seg_loss, accuracy))

                # saves the network when validation loss in minimized
                t_loss = margin_loss + seg_loss
                if t_loss < best_loss:
                    best_loss = t_loss
                    try:
                        capsnet.save(sess, config.save_file_name)
                        config.write_output('Saved Network\n')
                    except:
                        print('Failed to save network!!!')

        # calculate final test accuracy, f-mAP, and v-mAP
        iou()
示例#3
0
def train_one_epoch(sess, capsnet, data_gen, epoch):
    start_time = time.time()
    # continues until no more training data is generated
    batch, s_losses, seg_acc, reg_losses = 0.0, 0, 0, 0

    while data_gen.has_data():
        x_batch, seg_batch, crop1_batch, crop2_batch = data_gen.get_batch(
            config.batch_size)

        if config.multi_gpu and len(x_batch) == 1:
            print('Batch size of one, not running')
            continue

        n_samples = len(x_batch)

        use_gt_seg = epoch <= config.n_epochs_for_gt_seg
        use_gt_crop = epoch <= config.n_epochs_for_gt_crop

        hr_lstm_input = np.zeros(
            (n_samples, config.hr_lstm_size[0], config.hr_lstm_size[1],
             config.hr_lstm_feats))
        lr_lstm_input = np.zeros(
            (n_samples, config.lr_lstm_size[0], config.lr_lstm_size[1],
             config.lr_lstm_feats))

        outputs = sess.run(
            [
                capsnet.train_op, capsnet.segmentation_loss, capsnet.pred_caps,
                capsnet.seg_acc, capsnet.regression_loss
            ],
            feed_dict={
                capsnet.x_input_video: x_batch,
                capsnet.y_segmentation: seg_batch,
                capsnet.hr_cond_input: hr_lstm_input,
                capsnet.lr_cond_input: lr_lstm_input,
                capsnet.use_gt_seg: use_gt_seg,
                capsnet.use_gt_crop: use_gt_crop,
                capsnet.gt_crops1: crop1_batch,
                capsnet.gt_crops2: crop2_batch
            })

        _, s_loss, cap_vals, s_acc, reg_loss = outputs
        s_losses += s_loss
        seg_acc += s_acc
        reg_losses += reg_loss

        batch += 1

        if np.isnan(cap_vals[0][0]):
            print(cap_vals[0][:10])
            print('NAN encountered.')
            config.write_output('NAN encountered.\n')
            return -1, -1, -1

        if batch % config.batches_until_print == 0:
            print(
                'Finished %d batches. %d(s) since start. Avg Segmentation Loss is %.4f. Avg Regression Loss is %.4f. '
                'Seg Acc is %.4f.' %
                (batch, time.time() - start_time, s_losses / batch,
                 reg_losses / batch, seg_acc / batch))
            sys.stdout.flush()

    print(
        'Finish Epoch in %d(s). Avg Segmentation Loss is %.4f. Avg Regression Loss is %.4f. Seg Acc is %.4f.'
        % (time.time() - start_time, s_losses / batch, reg_losses / batch,
           seg_acc / batch))
    sys.stdout.flush()

    return s_losses / batch, reg_losses / batch, seg_acc / batch
示例#4
0
def train_network(gpu_config):
    capsnet = CapsNet()

    with tf.Session(graph=capsnet.graph, config=gpu_config) as sess:
        tf.global_variables_initializer().run()

        get_num_params()
        if config.start_at_epoch <= 1:
            config.clear_output()
        else:
            capsnet.load(
                sess, config.save_file_best_name % (config.start_at_epoch - 1))
            print('Loading from epoch %d.' % (config.start_at_epoch - 1))

        best_loss = 1000000
        best_epoch = 1
        print('Training on YoutubeVOS')
        for ep in range(config.start_at_epoch, config.n_epochs + 1):
            print(20 * '*', 'epoch', ep, 20 * '*')
            sys.stdout.flush()

            # Trains network for 1 epoch
            nan_tries = 0
            while nan_tries < 3:
                data_gen = TrainDataGen(config.wait_for_data,
                                        crop_size=config.hr_frame_size,
                                        n_frames=config.n_frames,
                                        rand_frame_skip=config.rand_frame_skip,
                                        multi_objects=config.multiple_objects)
                seg_loss, reg_loss, seg_acc = train_one_epoch(
                    sess, capsnet, data_gen, ep)

                if seg_loss < 0 or seg_acc < 0:
                    nan_tries += 1
                    capsnet.load(sess, config.save_file_best_name %
                                 best_epoch)  # loads in the previous epoch
                    while data_gen.has_data():
                        data_gen.get_batch(config.batch_size)
                else:
                    config.write_output(
                        'Epoch %d: SL: %.4f. RL: %.4f. SegAcc: %.4f.\n' %
                        (ep, seg_loss, reg_loss, seg_acc))
                    break

            if nan_tries == 3:
                print('Network cannot be trained. Too many NaN issues.')
                exit()

            # Validates network
            data_gen = ValidDataGen(config.wait_for_data,
                                    crop_size=config.hr_frame_size,
                                    n_frames=config.n_frames)
            seg_loss, seg_acc = validate(sess, capsnet, data_gen)

            config.write_output('Validation\tSL: %.4f. SA: %.4f.\n' %
                                (seg_loss, seg_acc))

            # saves every 10 epochs
            if ep % config.save_every_n_epochs == 0:
                try:
                    capsnet.save(sess, config.save_file_name % ep)
                    config.write_output('Saved Network\n')
                except:
                    print('Failed to save network!!!')
                    sys.stdout.flush()

            # saves when validation loss becomes smaller (after 50 epochs to save space)
            t_loss = seg_loss

            if t_loss < best_loss:
                best_loss = t_loss
                try:
                    capsnet.save(sess, config.save_file_best_name % ep)
                    best_epoch = ep
                    config.write_output('Saved Network - Minimum val\n')
                except:
                    print('Failed to save network!!!')
                    sys.stdout.flush()

    tf.reset_default_graph()
def iou():
    """
    Calculates the accuracy, f-mAP, and v-mAP over the test set
    """
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True

    capsnet = Caps3d()
    with tf.Session(graph=capsnet.graph, config=gpu_config) as sess:
        tf.global_variables_initializer().run()
        capsnet.load(sess, config.save_file_name)

        data_gen = TestDataGen(config.wait_for_data)

        n_correct, n_vids, n_tot_frames = 0, np.zeros(
            (config.n_classes, 1)), np.zeros((config.n_classes, 1))

        frame_ious = np.zeros((config.n_classes, 20))
        video_ious = np.zeros((config.n_classes, 20))
        iou_threshs = np.arange(0, 20, dtype=np.float32) / 20

        while data_gen.has_data():
            video, bbox, label = data_gen.get_next_video()

            f_skip = config.frame_skip
            clips = []
            n_frames = video.shape[0]
            for i in range(0, video.shape[0], 8 * f_skip):
                for j in range(f_skip):
                    b_vid, b_bbox = [], []
                    for k in range(8):
                        ind = i + j + k * f_skip
                        if ind >= n_frames:
                            b_vid.append(
                                np.zeros((1, 112, 112, 3), dtype=np.float32))
                            b_bbox.append(
                                np.zeros((1, 112, 112, 1), dtype=np.float32))
                        else:
                            b_vid.append(video[ind:ind + 1, :, :, :])
                            b_bbox.append(bbox[ind:ind + 1, :, :, :])

                    clips.append((np.concatenate(b_vid, axis=0),
                                  np.concatenate(b_bbox, axis=0), label))
                    if np.sum(clips[-1][1]) == 0:
                        clips.pop(-1)

            if len(clips) == 0:
                print('Video has no bounding boxes')
                continue

            batches, gt_segmentations = [], []
            for i in range(0, len(clips), config.batch_size):
                x_batch, bb_batch, y_batch = [], [], []
                for j in range(i, min(i + config.batch_size, len(clips))):
                    x, bb, y = clips[j]
                    x_batch.append(x)
                    bb_batch.append(bb)
                    y_batch.append(y)
                batches.append((x_batch, bb_batch, y_batch))
                gt_segmentations.append(np.stack(bb_batch))

            gt_segmentations = np.concatenate(gt_segmentations, axis=0)
            gt_segmentations = gt_segmentations.reshape(
                (-1, 112, 112, 1))  # Shape N_FRAMES, 112, 112, 1

            segmentations, predictions = [], []
            for x_batch, bb_batch, y_batch in batches:
                segmentation, pred = sess.run(
                    [capsnet.segment_layer_sig, capsnet.digit_preds],
                    feed_dict={
                        capsnet.x_input: x_batch,
                        capsnet.y_input: y_batch,
                        capsnet.m: 0.9,
                        capsnet.is_train: False
                    })
                segmentations.append(segmentation)
                predictions.append(pred)

            predictions = np.concatenate(predictions, axis=0)
            predictions = predictions.reshape((-1, config.n_classes))
            fin_pred = np.mean(predictions, axis=0)

            fin_pred = np.argmax(fin_pred)
            if fin_pred == label:
                n_correct += 1

            pred_segmentations = np.concatenate(segmentations, axis=0)
            pred_segmentations = pred_segmentations.reshape((-1, 112, 112, 1))

            pred_segmentations = (pred_segmentations >= 0.5).astype(np.int32)
            seg_plus_gt = pred_segmentations + gt_segmentations

            vid_inter, vid_union = 0, 0
            # calculates f_map
            for i in range(gt_segmentations.shape[0]):
                frame_gt = gt_segmentations[i]
                if np.sum(frame_gt) == 0:
                    continue

                n_tot_frames[label] += 1

                inter = np.count_nonzero(seg_plus_gt[i] == 2)
                union = np.count_nonzero(seg_plus_gt[i])
                vid_inter += inter
                vid_union += union

                i_over_u = inter / union
                for k in range(iou_threshs.shape[0]):
                    if i_over_u >= iou_threshs[k]:
                        frame_ious[label, k] += 1

            n_vids[label] += 1
            i_over_u = vid_inter / vid_union
            for k in range(iou_threshs.shape[0]):
                if i_over_u >= iou_threshs[k]:
                    video_ious[label, k] += 1

            if np.sum(n_vids) % 100 == 0:
                print('Finished %d videos' % np.sum(n_vids))

        print('Accuracy:', n_correct / np.sum(n_vids))
        config.write_output('Test Accuracy: %.4f\n' %
                            float(n_correct / np.sum(n_vids)))

        fAP = frame_ious / n_tot_frames
        fmAP = np.mean(fAP, axis=0)
        vAP = video_ious / n_vids
        vmAP = np.mean(vAP, axis=0)

        print('IoU f-mAP:')
        config.write_output('IoU f-mAP:\n')
        for i in range(20):
            print(iou_threshs[i], fmAP[i])
            config.write_output('%.4f\t%.4f\n' % (iou_threshs[i], fmAP[i]))
        config.write_output(str(fAP[:, 10]) + '\n')
        print(fAP[:, 10])
        print('IoU v-mAP:')
        config.write_output('IoU v-mAP:\n')
        for i in range(20):
            print(iou_threshs[i], vmAP[i])
            config.write_output('%.4f\t%.4f\n' % (iou_threshs[i], vmAP[i]))
        config.write_output(str(vAP[:, 10]) + '\n')
        print(vAP[:, 10])
示例#6
0
def train_network(gpu_config):
    capsnet = Caps3d()

    with tf.compat.v1.Session(graph=capsnet.graph, config=gpu_config) as sess:
        tf.compat.v1.global_variables_initializer().run()

        get_num_params()
        if config.start_at_epoch <= 1:
            config.clear_output()
        else:
            capsnet.load(sess, config.save_file_name % (config.start_at_epoch - 1))
            print('Loading from epoch %d.' % (config.start_at_epoch - 1))

        n_eps_after_acc, best_loss = -1, 100000
        print('Training on UCF101')
        for ep in range(config.start_at_epoch, config.n_epochs + 1):
            print(20 * '*', 'epoch', ep, 20 * '*')
            nan_tries = 0
            while nan_tries < 3:
                # trains network for one epoch
                data_gen = TrainDataGen(config.wait_for_data, frame_skip=config.frame_skip)
                margin_loss, seg_loss, acc = capsnet.train(sess, data_gen)

                if margin_loss < 0 or acc < 0:
                    nan_tries += 1
                    # capsnet.load(sess, config.save_file_name % 20)  # loads in the previous epoch
                    # while data_gen.has_data():
                    #     data_gen.get_batch(config.batch_size)
                else:
                    config.write_output('CL: %.4f. SL: %.4f. Acc: %.4f\n' % (margin_loss, seg_loss, acc))
                    break
            if nan_tries == 3:
                print('Network cannot be trained. Too many NaN issues.')
                exit()

            if ep % config.save_every_n_epochs == 0:
                try:
                    capsnet.save(sess, config.save_file_name % ep)
                    config.write_output('Saved Network\n')
                except:
                    print('Failed to save network!!!')

            # increments the margin
            if ep % config.n_eps_for_m == 0:
                capsnet.cur_m += config.m_delta
                capsnet.cur_m = min(capsnet.cur_m, 0.9)

            # only validates after a certain number of epochs and when the training accuracy is greater than a threshold
            # this is mainly used to save time, since validation takes about 10 minutes
            if (acc >= config.acc_for_eval or n_eps_after_acc >= 0) and ep >= config.n_eps_until_eval:
                n_eps_after_acc += 1

            # validates the network
            if (acc >= config.acc_for_eval and n_eps_after_acc % config.n_eps_for_eval == 0) or ep == config.n_epochs:
                # data_gen = TestDataGen(config.wait_for_data, frame_skip=1)
                # margin_loss, seg_loss, accuracy, _ = capsnet.eval(sess, data_gen, validation=True)
                #
                # config.write_output('Validation\tCL: %.4f. SL: %.4f. Acc: %.4f.\n' %
                #                     (margin_loss, seg_loss, accuracy))
                #
                # # saves the network when validation loss in minimized
                # t_loss = margin_loss + seg_loss
                # if t_loss < best_loss:
                #     best_loss = t_loss
                try:
                    capsnet.save(sess, config.save_file_name % ep)
                    config.write_output('Saved Network\n')
                except:
                    print('Failed to save network!!!')