Пример #1
0
def test_partial_trainability():
    #Define some model to start with
    model_keras = model_zoo.get_model("VGG_small_4", 32, 3, 2)
    #model_keras = model_zoo.get_model("LeNet5_do",32,3,2)

    #    model_keras1_c = model_keras1.get_config()
    #    model_keras2_c = model_keras2.get_config()

    Layer_types_orig = [
        model_keras.layers[i].__class__.__name__
        for i in range(len(model_keras.layers))
    ]
    Layer_names_orig = [
        model_keras.layers[i].name for i in range(len(model_keras.layers))
    ]

    #Count Dense and Conv layers
    is_dense_or_conv = [
        layer_type in ["Dense", "Conv2D"] for layer_type in Layer_types_orig
    ]
    index = np.where(np.array(is_dense_or_conv) == True)[0]
    #That would be the stuff, written in the table in AID
    Layer_names = np.array(Layer_names_orig)[index]

    #Provide a list with layer names, which should be changed and the corresponding trainabilities
    Layer_names = [Layer_names[0], Layer_names[1], Layer_names[2]]
    Layer_trainabliities = [0.2, 0.4, 0.6]

    model_keras_new = partial_trainability(model_keras, Layer_names,
                                           Layer_trainabliities)

    shape = list(model_keras_new.layers[0].input_shape)
    shape[0] = 1
    img_rndm = np.random.randint(low=0, high=255, size=shape)
    img_rndm = img_rndm.astype(float) / 255.0

    #Both models should perform identically
    p1 = model_keras.predict(img_rndm)
    p2 = model_keras_new.predict(img_rndm)
    assert np.allclose(p1, p2)

    #Also start a fititng processs
    shape_tr = shape
    shape_tr[0] = 250
    train_x = np.random.randint(low=0, high=255, size=shape)
    train_x = train_x.astype(float) / 255.0
    train_y = np.r_[np.repeat(0, 125), np.repeat(1, 125)]
    train_y_ = to_categorical(train_y, 2)  # * 2 - 1
    model_keras_new.fit(train_x, train_y_, epochs=1)
Пример #2
0
 def __init__(self, prefix, epoch, im_size=128, ctx_id=0):
     print('loading', prefix, epoch)
     if ctx_id >= 0:
         ctx = mx.gpu(ctx_id)
     else:
         ctx = mx.cpu()
     image_size = (im_size, im_size)
     sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
     all_layers = sym.get_internals()
     sym = all_layers['fc1_output']
     self.image_size = image_size
     model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
     model.bind(for_training=False, data_shapes=[
                ('data', (1, 3, image_size[0], image_size[1]))])
     model.set_params(arg_params, aux_params)
     self.model = model
     self.detector = model_zoo.get_model('retinaface_mnet025_v1')
     self.detector.prepare(ctx_id=ctx_id)
Пример #3
0
 def init_model(self):
     if isinstance(self.model, str):
         if os.path.isfile(self.model):
             return Model(file=self.model,
                          binary=self.binary,
                          regularizers=self.regularizers,
                          class_weights=self.class_weights,
                          recompile=getattr(self, 'recompile', False),
                          optimizer=getattr(self, 'optimizer', None),
                          loss=getattr(self, 'loss', None),
                          metrics=getattr(self, 'metrics', None))
         else:
             self.model = get_model(name=self.model,
                                    in_shape=self.in_shape,
                                    n_classes=self.n_classes,
                                    backend=self.backend)
     return Model(keras_model=self.model,
                  binary=self.binary,
                  optimizer=self.optimizer,
                  loss=self.loss,
                  metrics=self.metrics,
                  regularizers=self.regularizers,
                  class_weights=self.class_weights)
Пример #4
0
def train():
    train_input, train_target, val_input, val_target, test_input, test_target = load_data(
    )

    img_size = train_input.shape[1]
    num_channels = train_input.shape[-1]
    num_classes = train_target.shape[1]
    num_samples = train_input.shape[0]

    if PRETRAIN:
        assert PREPROCESSING == 'auto'
    model, preprocess_input = get_model(img_size,
                                        num_channels,
                                        num_classes,
                                        model_name=MODEL_NAME,
                                        pretrain=PRETRAIN)

    if CHECKPOINT_FILE is not None:
        assert os.path.exists(CHECKPOINT_FILE)
        model.load_weights(CHECKPOINT_FILE)
        epoch = int(CHECKPOINT_FILE.split('_')[-2][1:]) + 1
        lr_init = learning_rate_function(LR_INIT, epoch)
        print("Loaded weights from {}".format(CHECKPOINT_FILE))
    else:
        epoch = 0
        lr_init = LR_INIT
    shape_str = "({0}, {0}, {1})".format(img_size, num_channels)
    print("Finished loading {0} with {1} input shape (pretrained: {2})".format(
        MODEL_NAME, shape_str, PRETRAIN))

    if PREPROCESSING == 'auto':
        train_input = preprocess_input(train_input)
        test_input = preprocess_input(test_input)
        val_input = preprocess_input(val_input)
    elif PREPROCESSING == 'standard':
        row_axis = 1
        col_axis = 2
        channel_axis = 3
        mean = np.mean(train_input, axis=(0, row_axis, col_axis))
        broadcast_shape = [1, 1, 1]
        broadcast_shape[channel_axis - 1] = train_input.shape[channel_axis]
        mean = np.reshape(mean, broadcast_shape)
        # x -= mean
        train_input -= mean
        test_input -= mean
        val_input -= mean
        std = np.std(train_input, axis=(0, row_axis, col_axis))
        broadcast_shape = [1, 1, 1]
        broadcast_shape[channel_axis - 1] = train_input.shape[channel_axis]
        std = np.reshape(std, broadcast_shape)
        # x /= (std + K.epsilon())
        train_input /= (std + EPSILON)
        test_input /= (std + EPSILON)
        val_input /= (std + EPSILON)
    elif PREPROCESSING == 'simple':
        train_input /= 255.
        test_input /= 255.
        val_input /= 255
    print("Finished preprocessing data using method: {}".format(PREPROCESSING))

    optimizer = Adam(lr=lr_init, beta_1=0.9, beta_2=0.999)
    model.compile(optimizer=optimizer, loss='binary_crossentropy')
    print("Constructed and compiled model")

    if not TEST_ONLY:
        ###### Start training ######
        # Note: _ is pythonic for an unused variable: Was i
        for _ in range(EPOCHS_TRAINING):
            print()
            print("Starting training for epoch {}".format(epoch))
            shuffle([train_input, train_target])
            generator = get_data_gen(train_input,
                                     train_target,
                                     class_weights=None)
            K.set_value(model.optimizer.lr,
                        learning_rate_function(LR_INIT, epoch))

            progbar = generic_utils.Progbar(num_samples, interval=0.0)
            for input_batch, target_batch in generator:
                loss = model.train_on_batch(input_batch, target_batch)
                progbar.add(BATCH_SIZE, values=[("train_loss", loss)])
            print()

            val_auc_scores = test(model, val_input, val_target)
            mean_score = np.mean(val_auc_scores)
            if (epoch + 1) % CHECKPOINT_INTERVAL == 0:
                model_file = os.path.join(OUTDIR, "checkpoint_E%s_F%s.h5" % \
                                          (epoch, round(mean_score, 3)))
                model.save_weights(model_file)
                print("Saved weights to {}".format(model_file))

            epoch += 1
        ###### Finished training ######

    print("Test results:")
    test_auc_scores = test(model, test_input, test_target)
    print(test_auc_scores)
Пример #5
0
    train_dataset = DistractorDataset(image_path=args.dataset,
                                      csv_data_file=os.path.join(args.csv_path, 'distractor_train.csv'),
                                      transform=train_transforms)
    val_dataset = DistractorDataset(image_path=args.dataset,
                                    csv_data_file=os.path.join(args.csv_path, 'distractor_test.csv'),
                                    transform=val_transforms)

    train_dataloader = DataLoader(train_dataset, batch_size=args.bs,
                              shuffle=True, num_workers=args.n_threads)

    val_dataloader = DataLoader(val_dataset, batch_size=args.bs,
                              shuffle=False, num_workers=args.n_threads)


    net = model_zoo.get_model(args)
    net = nn.DataParallel(net)
    net.cuda()

    optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.wd)

    # Scheduler
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[50, 100], gamma=0.1)
    # Criterion
    criterion = nn.CrossEntropyLoss().cuda()
    
    train_losses = []
    val_losses = []
    val_acc = []
    prev_model = None
def main():
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(
        os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    sw = SummaryWriter(logdir=opt.save_dir, flush_secs=5, verbose=False)

    if opt.kvstore is not None:
        kv = mx.kvstore.create(opt.kvstore)
        logger.info(
            'Distributed training with %d workers and current rank is %d' %
            (kv.num_workers, kv.rank))
    if opt.use_amp:
        amp.init()

    batch_size = opt.batch_size
    classes = opt.num_classes

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    logger.info('Total batch size is set to %d on %d GPUs' %
                (batch_size, num_gpus))
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(
            range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]

    if opt.slowfast:
        optimizer = 'nag'
    else:
        optimizer = 'sgd'

    if opt.clip_grad > 0:
        optimizer_params = {
            'learning_rate': opt.lr,
            'wd': opt.wd,
            'momentum': opt.momentum,
            'clip_gradient': opt.clip_grad
        }
    else:
        optimizer_params = {
            'learning_rate': opt.lr,
            'wd': opt.wd,
            'momentum': opt.momentum
        }

    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True

    model_name = opt.model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    use_tsn=opt.use_tsn,
                    num_segments=opt.num_segments,
                    partial_bn=opt.partial_bn,
                    input_channel=opt.input_channel)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    logger.info(net)

    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx=context)

    if opt.kvstore is not None:
        train_data, val_data, batch_fn = get_data_loader(
            opt, batch_size, num_workers, logger, kv)
    else:
        train_data, val_data, batch_fn = get_data_loader(
            opt, batch_size, num_workers, logger)

    num_batches = len(train_data)
    lr_scheduler = LRSequential([
        LRScheduler('linear',
                    base_lr=opt.warmup_lr,
                    target_lr=opt.lr,
                    nepochs=opt.warmup_epochs,
                    iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode,
                    base_lr=opt.lr,
                    target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay,
                    power=2)
    ])
    optimizer_params['lr_scheduler'] = lr_scheduler

    train_metric = mx.metric.Accuracy()
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)

    def get_diff(input_data, new_length=5):
        assert input_data.shape[3] == new_length + 1
        fron = input_data.slice_axis(axis=3, begin=1,
                                     end=new_length + 1).copy()
        last = input_data.slice_axis(axis=3, begin=0, end=new_length)
        fron = fron - last
        return fron

    def test(ctx, val_data, kvstore=None):
        acc_top1.reset()
        acc_top5.reset()
        L = gluon.loss.SoftmaxCrossEntropyLoss()
        num_test_iter = len(val_data)
        val_loss_epoch = 0
        for i, batch in enumerate(val_data):
            data, label = batch_fn(batch, ctx)
            outputs = []
            for _, X in enumerate(data):
                #                X = X.reshape((-1,) + X.shape[2:])
                X = get_diff(X, new_length=opt.new_length)
                X = X.reshape((-3, -3, -2))
                pred = net(X.astype(opt.dtype, copy=False))
                outputs.append(pred)

            loss = [
                L(yhat, y.astype(opt.dtype, copy=False))
                for yhat, y in zip(outputs, label)
            ]

            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

            val_loss_epoch += sum([l.mean().asscalar()
                                   for l in loss]) / len(loss)

            if opt.log_interval and not (i + 1) % opt.log_interval:
                logger.info('Batch [%04d]/[%04d]: evaluated' %
                            (i, num_test_iter))

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        val_loss = val_loss_epoch / num_test_iter

        if kvstore is not None:
            top1_nd = nd.zeros(1)
            top5_nd = nd.zeros(1)
            val_loss_nd = nd.zeros(1)
            kvstore.push(111111, nd.array(np.array([top1])))
            kvstore.pull(111111, out=top1_nd)
            kvstore.push(555555, nd.array(np.array([top5])))
            kvstore.pull(555555, out=top5_nd)
            kvstore.push(999999, nd.array(np.array([val_loss])))
            kvstore.pull(999999, out=val_loss_nd)
            top1 = top1_nd.asnumpy() / kvstore.num_workers
            top5 = top5_nd.asnumpy() / kvstore.num_workers
            val_loss = val_loss_nd.asnumpy() / kvstore.num_workers

        return (top1, top5, val_loss)

    def train(ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]

        if opt.no_wd:
            for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        if opt.partial_bn:
            train_patterns = None
            if 'inceptionv3' in opt.model:
                train_patterns = '.*weight|.*bias|inception30_batchnorm0_gamma|inception30_batchnorm0_beta|inception30_batchnorm0_running_mean|inception30_batchnorm0_running_var'
            else:
                logger.info(
                    'Current model does not support partial batch normalization.'
                )

            if opt.kvstore is not None:
                trainer = gluon.Trainer(net.collect_params(train_patterns),
                                        optimizer,
                                        optimizer_params,
                                        kvstore=kv,
                                        update_on_kvstore=False)
            else:
                trainer = gluon.Trainer(net.collect_params(train_patterns),
                                        optimizer,
                                        optimizer_params,
                                        update_on_kvstore=False)
        else:
            if opt.kvstore is not None:
                trainer = gluon.Trainer(net.collect_params(),
                                        optimizer,
                                        optimizer_params,
                                        kvstore=kv,
                                        update_on_kvstore=False)
            else:
                trainer = gluon.Trainer(net.collect_params(),
                                        optimizer,
                                        optimizer_params,
                                        update_on_kvstore=False)

        if opt.accumulate > 1:
            params = [
                p for p in net.collect_params().values()
                if p.grad_req != 'null'
            ]
            for p in params:
                p.grad_req = 'add'

        if opt.resume_states is not '':
            trainer.load_states(opt.resume_states)

        if opt.use_amp:
            amp.init_trainer(trainer)

        L = gluon.loss.SoftmaxCrossEntropyLoss()

        best_val_score = 0
        lr_decay_count = 0

        for epoch in range(opt.resume_epoch, opt.num_epochs):
            tic = time.time()
            train_metric.reset()
            btic = time.time()
            num_train_iter = len(train_data)
            train_loss_epoch = 0
            train_loss_iter = 0

            for i, batch in enumerate(train_data):
                data, label = batch_fn(batch, ctx)

                with ag.record():
                    outputs = []
                    for _, X in enumerate(data):
                        #                        X = X.reshape((-1,) + X.shape[2:])
                        X = get_diff(X, new_length=opt.new_length)
                        X = X.reshape((-3, -3, -2))
                        pred = net(X.astype(opt.dtype, copy=False))
                        outputs.append(pred)
                    loss = [
                        L(yhat, y.astype(opt.dtype, copy=False))
                        for yhat, y in zip(outputs, label)
                    ]

                    if opt.use_amp:
                        with amp.scale_loss(loss, trainer) as scaled_loss:
                            ag.backward(scaled_loss)
                    else:
                        ag.backward(loss)

                if opt.accumulate > 1 and (i + 1) % opt.accumulate == 0:
                    if opt.kvstore is not None:
                        trainer.step(batch_size * kv.num_workers *
                                     opt.accumulate)
                    else:
                        trainer.step(batch_size * opt.accumulate)
                        net.collect_params().zero_grad()
                else:
                    if opt.kvstore is not None:
                        trainer.step(batch_size * kv.num_workers)
                    else:
                        trainer.step(batch_size)

                train_metric.update(label, outputs)
                train_loss_iter = sum([l.mean().asscalar()
                                       for l in loss]) / len(loss)
                train_loss_epoch += train_loss_iter

                train_metric_name, train_metric_score = train_metric.get()
                sw.add_scalar(tag='train_acc_top1_iter',
                              value=train_metric_score * 100,
                              global_step=epoch * num_train_iter + i)
                sw.add_scalar(tag='train_loss_iter',
                              value=train_loss_iter,
                              global_step=epoch * num_train_iter + i)
                sw.add_scalar(tag='learning_rate_iter',
                              value=trainer.learning_rate,
                              global_step=epoch * num_train_iter + i)

                if opt.log_interval and not (i + 1) % opt.log_interval:
                    logger.info(
                        'Epoch[%03d] Batch [%04d]/[%04d]\tSpeed: %f samples/sec\t %s=%f\t loss=%f\t lr=%f'
                        % (epoch, i, num_train_iter,
                           batch_size * opt.log_interval /
                           (time.time() - btic), train_metric_name,
                           train_metric_score * 100, train_loss_epoch /
                           (i + 1), trainer.learning_rate))
                    btic = time.time()

            train_metric_name, train_metric_score = train_metric.get()
            throughput = int(batch_size * i / (time.time() - tic))
            mx.ndarray.waitall()

            if opt.kvstore is not None and epoch == opt.resume_epoch:
                kv.init(111111, nd.zeros(1))
                kv.init(555555, nd.zeros(1))
                kv.init(999999, nd.zeros(1))

            if opt.kvstore is not None:
                acc_top1_val, acc_top5_val, loss_val = test(ctx, val_data, kv)
            else:
                acc_top1_val, acc_top5_val, loss_val = test(ctx, val_data)

            logger.info('[Epoch %03d] training: %s=%f\t loss=%f' %
                        (epoch, train_metric_name, train_metric_score * 100,
                         train_loss_epoch / num_train_iter))
            logger.info('[Epoch %03d] speed: %d samples/sec\ttime cost: %f' %
                        (epoch, throughput, time.time() - tic))
            logger.info(
                '[Epoch %03d] validation: acc-top1=%f acc-top5=%f loss=%f' %
                (epoch, acc_top1_val * 100, acc_top5_val * 100, loss_val))

            sw.add_scalar(tag='train_loss_epoch',
                          value=train_loss_epoch / num_train_iter,
                          global_step=epoch)
            sw.add_scalar(tag='val_loss_epoch',
                          value=loss_val,
                          global_step=epoch)
            sw.add_scalar(tag='val_acc_top1_epoch',
                          value=acc_top1_val * 100,
                          global_step=epoch)

            if acc_top1_val > best_val_score:
                best_val_score = acc_top1_val
                net.save_parameters('%s/%.4f-%s-%s-%03d-best.params' %
                                    (opt.save_dir, best_val_score, opt.dataset,
                                     model_name, epoch))
                trainer.save_states('%s/%.4f-%s-%s-%03d-best.states' %
                                    (opt.save_dir, best_val_score, opt.dataset,
                                     model_name, epoch))
            else:
                if opt.save_frequency and opt.save_dir and (
                        epoch + 1) % opt.save_frequency == 0:
                    net.save_parameters(
                        '%s/%s-%s-%03d.params' %
                        (opt.save_dir, opt.dataset, model_name, epoch))
                    trainer.save_states(
                        '%s/%s-%s-%03d.states' %
                        (opt.save_dir, opt.dataset, model_name, epoch))

        # save the last model
        net.save_parameters(
            '%s/%s-%s-%03d.params' %
            (opt.save_dir, opt.dataset, model_name, opt.num_epochs - 1))
        trainer.save_states(
            '%s/%s-%s-%03d.states' %
            (opt.save_dir, opt.dataset, model_name, opt.num_epochs - 1))

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    train(context)
    sw.close()
Пример #7
0
optimizer_params = {
    'learning_rate': opt.lr,
    'wd': opt.wd,
    'momentum': opt.momentum
}

if opt.dtype != 'float32':
    optimizer_params['multi_precision'] = True

model_name = opt.model

net = get_model(name=model_name,
                nclass=classes,
                pretrained=opt.use_pretrained,
                use_tsn=True,
                num_segments=opt.num_segments,
                partial_bn=opt.partial_bn,
                input_channel=opt.input_channel)
net.cast(opt.dtype)
net.collect_params().reset_ctx(ctx)
print(net)
net.hybridize(static_alloc=True, static_shape=True)
train_data, val_data, batch_fn = get_data_loader(opt,
                                                 batch_size,
                                                 num_workers,
                                                 logger=None)

# Stochastic gradient descent
optimizer = 'sgd'
# Set parameters