示例#1
0
def main(_):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg, training=True)
    model.summary(line_length=80)

    # define prior box
    priors = prior_box((cfg['input_size'], cfg['input_size']),
                       cfg['min_sizes'], cfg['steps'], cfg['clip'])

    # load dataset
    train_dataset = load_dataset(cfg, priors, shuffle=True)

    # define optimizer
    steps_per_epoch = cfg['dataset_len'] // cfg['batch_size']
    learning_rate = MultiStepWarmUpLR(
        initial_learning_rate=cfg['init_lr'],
        lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']],
        lr_rate=cfg['lr_rate'],
        warmup_steps=cfg['warmup_epoch'] * steps_per_epoch,
        min_lr=cfg['min_lr'])
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,
                                        momentum=0.9,
                                        nesterov=True)

    # define losses function
    multi_box_loss = MultiBoxLoss()

    # load checkpoint
    checkpoint_dir = '/content/drive/My Drive/Colab/checkpoints/' + cfg[
        'sub_name']
    checkpoint = tf.train.Checkpoint(step=tf.Variable(0, name='step'),
                                     optimizer=optimizer,
                                     model=model)
    manager = tf.train.CheckpointManager(checkpoint=checkpoint,
                                         directory=checkpoint_dir,
                                         max_to_keep=3)
    if manager.latest_checkpoint:
        checkpoint.restore(manager.latest_checkpoint)
        print('[*] load ckpt from {} at step {}.'.format(
            manager.latest_checkpoint, checkpoint.step.numpy()))
    else:
        print("[*] training from scratch.")

    # define training step function
    @tf.function
    def train_step(inputs, labels):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)

            losses = {}
            losses['reg'] = tf.reduce_sum(model.losses)
            losses['loc'], losses['landm'], losses['class'] = \
                multi_box_loss(labels, predictions)
            total_loss = tf.add_n([l for l in losses.values()])

        grads = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        return total_loss, losses

    # training loop
    summary_writer = tf.summary.create_file_writer('./logs/' + cfg['sub_name'])
    remain_steps = max(
        steps_per_epoch * cfg['epoch'] - checkpoint.step.numpy(), 0)
    prog_bar = ProgressBar(steps_per_epoch,
                           checkpoint.step.numpy() % steps_per_epoch)

    for inputs, labels in train_dataset.take(remain_steps):
        checkpoint.step.assign_add(1)
        steps = checkpoint.step.numpy()

        total_loss, losses = train_step(inputs, labels)

        prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
            ((steps - 1) // steps_per_epoch) + 1, cfg['epoch'],
            total_loss.numpy(),
            optimizer.lr(steps).numpy()))

        if steps % 10 == 0:
            with summary_writer.as_default():
                tf.summary.scalar('loss/total_loss', total_loss, step=steps)
                for k, l in losses.items():
                    tf.summary.scalar('loss/{}'.format(k), l, step=steps)
                tf.summary.scalar('learning_rate',
                                  optimizer.lr(steps),
                                  step=steps)

        if steps % cfg['save_steps'] == 0:
            manager.save()
            print("\n[*] save ckpt file at {}".format(
                manager.latest_checkpoint))

    manager.save()
    print("\n[*] training done! save ckpt file at {}".format(
        manager.latest_checkpoint))
示例#2
0
import time
import cv2
import numpy as np
from modules.dataset import load_tfrecord_dataset
from modules.anchor import prior_box, decode_tf
from modules.utils import draw_bbox_landm, draw_anchor


using_bin = True
batch_size = 1
min_sizes = [[16, 32], [64, 128], [256, 512]]
steps = [8, 16, 32]
clip = False

img_dim = 640
priors = prior_box((img_dim, img_dim), min_sizes, steps, clip)

visualization = True  # False for time cost estimattion
using_encoding = True  # batch size should be 1 when False
variances = [0.1, 0.2]
match_thresh = 0.45
ignore_thresh = 0.3
num_samples = 100

if using_bin:
    tfrecord_name = './data/widerface_train_bin.tfrecord'
else:
    tfrecord_name = './data/widerface_train.tfrecord'

train_dataset = load_tfrecord_dataset(
    tfrecord_name, batch_size, img_dim=640, using_bin=using_bin,
def main(_):

    min_sizes = [[16, 32], [64, 128], [256, 512]]
    steps = [8, 16, 32]
    clip = False

    img_dim = 640
    priors = prior_box((img_dim, img_dim), min_sizes, steps, clip)

    variances = [0.1, 0.2]
    match_thresh = 0.45
    ignore_thresh = 0.3
    batch_size = 1
    shuffle = True
    using_flip = True
    using_distort = True
    using_bin = True
    buffer_size = 4000
    number_cycles = 2
    threads = 2

    check_dataset = load_tfrecord_dataset(dataset_root=FLAGS.dataset_path,
                                          split=FLAGS.split,
                                          threads=threads,
                                          number_cycles=number_cycles,
                                          batch_size=batch_size,
                                          hvd=[],
                                          img_dim=img_dim,
                                          using_bin=using_bin,
                                          using_flip=using_flip,
                                          using_distort=using_distort,
                                          using_encoding=FLAGS.using_encoding,
                                          priors=priors,
                                          match_thresh=match_thresh,
                                          ignore_thresh=ignore_thresh,
                                          variances=variances,
                                          shuffle=shuffle,
                                          buffer_size=buffer_size)

    time.time()
    for idx, (inputs, labels, _) in enumerate(check_dataset):
        print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape)

        if not FLAGS.visualization:
            continue

        img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8)
        if not FLAGS.using_encoding:
            # labels includes loc, landm, landm_valid.
            targets = labels.numpy()[0]
            for target in targets:
                draw_bbox_landm(img, target, img_dim, img_dim)
        else:
            # labels includes loc, landm, landm_valid, conf.
            targets = decode_tf(labels[0], priors, variances=variances).numpy()
            for prior_index in range(len(targets)):
                if targets[prior_index][-1] != 1:
                    continue

                draw_bbox_landm(img, targets[prior_index], img_dim, img_dim)
                draw_anchor(img, priors[prior_index], img_dim, img_dim)

        cv2.imwrite('{}/{}.png'.format(FLAGS.output_path, str(idx)),
                    img[:, :, ::-1])
def main(_):
    min_sizes = [[16, 32], [64, 128], [256, 512]]
    steps = [8, 16, 32]
    clip = False

    img_dim = 640
    priors = prior_box((img_dim, img_dim), min_sizes, steps, clip)

    variances = [0.1, 0.2]
    match_thresh = 0.45
    ignore_thresh = 0.3
    num_samples = 100

    if FLAGS.using_encoding:
        assert FLAGS.batch_size == 1

    if FLAGS.using_bin:
        tfrecord_name = './data/widerface_train_bin.tfrecord'
    else:
        tfrecord_name = './data/widerface_train.tfrecord'

    train_dataset = load_tfrecord_dataset(tfrecord_name,
                                          FLAGS.batch_size,
                                          img_dim=640,
                                          using_bin=FLAGS.using_bin,
                                          using_flip=True,
                                          using_distort=False,
                                          using_encoding=FLAGS.using_encoding,
                                          priors=priors,
                                          match_thresh=match_thresh,
                                          ignore_thresh=ignore_thresh,
                                          variances=variances,
                                          shuffle=False)

    start_time = time.time()
    for idx, (inputs, labels) in enumerate(train_dataset.take(num_samples)):
        print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape)

        if not FLAGS.visualization:
            continue

        img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8)
        if not FLAGS.using_encoding:
            # labels includes loc, landm, landm_valid.
            targets = labels.numpy()[0]
            for target in targets:
                draw_bbox_landm(img, target, img_dim, img_dim)
        else:
            # labels includes loc, landm, landm_valid, conf.
            targets = decode_tf(labels[0], priors, variances=variances).numpy()
            for prior_index in range(len(targets)):
                if targets[prior_index][-1] != 1:
                    continue

                draw_bbox_landm(img, targets[prior_index], img_dim, img_dim)
                draw_anchor(img, priors[prior_index], img_dim, img_dim)

        cv2.imshow('img', cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
        if cv2.waitKey(0) == ord('q'):
            exit()

    print("data fps: {:.2f}".format(num_samples / (time.time() - start_time)))
示例#5
0
    label[2] += label[0]
    label[3] += label[1]
    print(label)
    labels = [label]
    cv2.rectangle(img_raw, (label[0], label[1]), (label[2], label[3]), (0, 255, 0), 2)
    cv2.imshow('img_raw', img_raw)

    input_size = 320
    steps = [8, 16, 32]
    min_sizes = [[8, 16], [32, 64], [128, 256]]
    match_thresh = 0.5
    ignore_thresh = 0.3
    variances = [0.1, 0.2]

    # define prior box
    priors = prior_box((input_size, input_size), min_sizes, steps, True)

    # img = np.array(img_raw)
    labels = np.array(labels)
    img, labels = _transform_data(
        input_size, True, True, True, priors,
        match_thresh, ignore_thresh, variances)(img_raw, labels)
    img = np.array(img, np.int8)

    img_h, img_w, _ = img.shape
    # boxes = _decode_bbox(labels[:, :4], priors)
    for i in range(labels.shape[0]):
        if labels[i][-1] > 0:
            box = priors[i]
            x1 = int(box[0] * img_w - box[2] * img_w / 2)
            y1 = int(box[1] * img_h - box[3] * img_h / 2)
示例#6
0
def train_retinaface(cfg):

    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    if cfg['distributed']:
        import horovod.tensorflow as hvd
        # Initialize Horovod
        hvd.init()
    else:
        hvd = []
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    reset_random_seeds()

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth(hvd)

    # define network
    model = RetinaFaceModel(cfg, training=True)
    model.summary(line_length=80)

    # define prior box
    priors = prior_box((cfg['input_size'], cfg['input_size']),
                       cfg['min_sizes'],  cfg['steps'], cfg['clip'])

    # load dataset
    train_dataset = load_dataset(cfg, priors, 'train', hvd)
    if cfg['evaluation_during_training']:
        val_dataset = load_dataset(cfg, priors, 'val', [])

    # define optimizer
    if cfg['distributed']:
        init_lr = cfg['init_lr'] * hvd.size()
        min_lr = cfg['min_lr'] * hvd.size()
        steps_per_epoch = cfg['dataset_len'] // (cfg['batch_size'] * hvd.size())
    else:
        init_lr = cfg['init_lr']
        min_lr = cfg['min_lr']
        steps_per_epoch = cfg['dataset_len'] // cfg['batch_size']

    learning_rate = MultiStepWarmUpLR(
        initial_learning_rate=init_lr,
        lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']],
        lr_rate=cfg['lr_rate'],
        warmup_steps=cfg['warmup_epoch'] * steps_per_epoch,
        min_lr=min_lr)

    optimizer = tf.keras.optimizers.SGD(
        learning_rate=learning_rate, momentum=0.9, nesterov=True)

    # define losses function
    multi_box_loss = MultiBoxLoss(num_class=cfg['num_class'])

    # load checkpoint
    checkpoint_dir = os.path.join(cfg['output_path'], 'checkpoints', cfg['sub_name'])
    checkpoint = tf.train.Checkpoint(epoch=tf.Variable(0, name='epoch'),
                                     optimizer=optimizer,
                                     model=model)
    manager = tf.train.CheckpointManager(checkpoint=checkpoint,
                                         directory=checkpoint_dir,
                                         max_to_keep=3)

    os.makedirs(checkpoint_dir, exist_ok=True)
    with open(os.path.join(checkpoint_dir, 'cfg.pickle'), 'wb') as handle:
        pickle.dump(cfg, handle, protocol=pickle.HIGHEST_PROTOCOL)

    if manager.latest_checkpoint:
        checkpoint.restore(manager.latest_checkpoint)
        print('[*] load ckpt from {}'.format(manager.latest_checkpoint))
    else:
        print("[*] training from scratch.")

    # define training step function
    @tf.function
    def train_step(inputs, labels, first_batch, epoch):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)

            losses = {}
            losses['reg'] = tf.reduce_sum(model.losses)
            losses['loc'], losses['landm'], losses['class'] = \
                multi_box_loss(labels, predictions)
            total_loss = tf.add_n([l for l in losses.values()])

        if cfg['distributed']:
            # Horovod: add Horovod Distributed GradientTape.
            tape = hvd.DistributedGradientTape(tape)

        grads = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if cfg['distributed'] and first_batch and epoch:
            hvd.broadcast_variables(model.variables, root_rank=0)
            hvd.broadcast_variables(optimizer.variables(), root_rank=0)

        return total_loss, losses

    def test_step(inputs, img_name):
        _, img_height_raw, img_width_raw, _ = inputs.shape
        # pad input image to avoid unmatched shape problem
        img = inputs[0].numpy()
        # if img_name == '6_Funeral_Funeral_6_618':
        #     resize = 0.5 # this image is too big to avoid OOM problem
        #     img = cv2.resize(img, None, None, fx=resize, fy=resize,
        #                      interpolation=cv2.INTER_LINEAR)
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
        input_img = img[np.newaxis, ...]
        predictions = model(input_img, training=False)
        outputs = pred_to_outputs(cfg, predictions, input_img.shape).numpy()
        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        bboxs = outputs[:, :4]
        confs = outputs[:, -1]
        pred_boxes = []
        for box, conf in zip(bboxs, confs):
            x = int(box[0] * img_width_raw)
            y = int(box[1] * img_height_raw)
            w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
            h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
            pred_boxes.append([x, y, w, h, conf])

        pred_boxes = np.array(pred_boxes).astype('float')

        return pred_boxes

    #training loop
    summary_writer = tf.summary.create_file_writer(os.path.join(cfg['output_path'], 'logs', cfg['sub_name']))
    prog_bar = ProgressBar(steps_per_epoch, 0)

    if cfg['evaluation_during_training']:
        widerface_eval_hard = WiderFaceEval(split='hard')

    for epoch in range(cfg['epoch']):
        try:
            actual_epoch = epoch + 1

            if cfg['distributed']:
                if hvd.rank() == 0:
                    print("\nStart of epoch %d" % (actual_epoch,))
            else:
                print("\nStart of epoch %d" % (actual_epoch,))

            checkpoint.epoch.assign_add(1)
            start_time = time.time()

            #Iterate over the batches of the dataset.
            for batch, (x_batch_train, y_batch_train, img_name) in enumerate(train_dataset):
                total_loss, losses = train_step(x_batch_train, y_batch_train, batch == 0, epoch == 0)

                if cfg['distributed']:
                    if hvd.rank() == 0:
                        # prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
                        #     checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32)))
                        if batch % 100 == 0:
                            print("batch={}/{},  epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
                                batch, steps_per_epoch, checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32)))
                else:
                    prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
                        checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32)))

            # Display metrics at the end of each epoch.
            # train_acc = train_acc_metric.result()
            # print("\nTraining loss over epoch: %.4f" % (float(total_loss.numpy()),))

            if cfg['distributed']:
                if hvd.rank() == 0:
                    print("Time taken: %.2fs" % (time.time() - start_time))
                    manager.save()
                    print("\n[*] save ckpt file at {}".format(manager.latest_checkpoint))
            else:
                print("Time taken: %.2fs" % (time.time() - start_time))
                manager.save()
                print("\n[*] save ckpt file at {}".format(manager.latest_checkpoint))

            if cfg['evaluation_during_training']:
                # Run a validation loop at the end of each epoch.
                for batch, (x_batch_val, y_batch_val, img_name) in enumerate(val_dataset.take(500)):
                    if '/' in img_name.numpy()[0].decode():
                        img_name = img_name.numpy()[0].decode().split('/')[1].split('.')[0]
                    else:
                        img_name = []
                    pred_boxes = test_step(x_batch_val, img_name)
                    gt_boxes = labels_to_boxes(y_batch_val)
                    widerface_eval_hard.update(pred_boxes, gt_boxes, img_name)

                ap_hard = widerface_eval_hard.calculate_ap()
                widerface_eval_hard.reset()

                if cfg['distributed']:
                    if hvd.rank() == 0:
                        print("Validation acc: %.4f" % (float(ap_hard),))
                else:
                    print("Validation acc: %.4f" % (float(ap_hard),))

            def tensorboard_writer():
                with summary_writer.as_default():
                    tf.summary.scalar('loss/total_loss', total_loss, step=actual_epoch)
                    for k, l in losses.items():
                        tf.summary.scalar('loss/{}'.format(k), l, step=actual_epoch)
                    tf.summary.scalar('learning_rate', optimizer._decayed_lr(tf.float32), step=actual_epoch)
                    if cfg['evaluation_during_training']:
                        tf.summary.scalar('Val AP', ap_hard, step=actual_epoch)

            if cfg['distributed']:
                if hvd.rank() == 0:
                    tensorboard_writer()
            else:
                tensorboard_writer()

        except Exception as E:
            print(E)
            continue

    if cfg['distributed']:
        if hvd.rank() == 0:
            manager.save()
            print("\n[*] training done! save ckpt file at {}".format(
                manager.latest_checkpoint))
    else:
        manager.save()
        print("\n[*] training done! save ckpt file at {}".format(
            manager.latest_checkpoint))