def main(arg=None):
    images, labels = image_loader.read_batch()
    logits = inference.inference(images)
    loss = ls.loss(logits, labels)

    saver = tf.train.Saver()

    summary_opt = tf.summary.merge_all()

    init = tf.global_variables_initializer()

    sess = tf.InteractiveSession()

    sess.run(init)
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.summary.FileWriter(flag.log_dir, graph=sess.graph)

    train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    for i in xrange(5001):
        if i % 100 == 0:
            print 'step {0}, loss: {1}'.format(i, sess.run(ls.get_loss()))
        sess.run(train_step)
        if i % 50 == 0:
            summary_str = sess.run(summary_opt)
            summary_writer.add_summary(summary_str, i)

    saver.save(sess=sess, save_path=flag.save_dir)
    summary_writer.close()
Exemplo n.º 2
0
def train(arguments):
    dls = UpscalerDataset(arguments.dataset_path,
                          arguments.resized_dataset_path)
    feat_loss = get_loss(arguments.loss_model_path)

    learner = unet_learner(dls.get_dataloaders(bs, size),
                           arch,
                           loss_func=feat_loss,
                           metrics=LossMetrics(feat_loss.metric_names),
                           blur=True,
                           norm_type=NormType.Weight)

    # stage 1
    print('stage 1')
    do_fit(learner, wd, slice(lr * 10))
    learner.unfreeze()
    do_fit(learner, wd, slice(1e-5, lr))

    # checkpoint
    learner.save('checkpoint')
    learner.load('checkpoint')
    # stage 2
    print('stage 2')
    del learner.dls
    learner.dls = dls.get_dataloaders(5, size * 2)
    learner.freeze()
    do_fit(learner, wd, slice(lr))
    learner.unfreeze()
    do_fit(learner, wd, slice(1e-6, 1e-4), pct_start=0.3)

    # save
    torch.save({
        'model': learner.model,
        'meta': make_meta(arguments)
    }, arguments.output)
def initialize_training(model_id, save_path):
    # The get_model method is in charge of
    # setting the same seed for each loaded model.
    # Thus, for each inner loop we train the same initialized model
    # Load model_0 to continue training with it
    if str(model_id).lower() == 'same':
        final_slash = save_path.rindex('/')
        model_0_path = os.path.join(save_path[:final_slash],
                                    "Model_0/model.pt")
        model = torch.load(model_0_path, map_location=parameters.device)
    else:
        model = get_model(model_id).to(parameters.device)

    print(model)
    writer = SummaryWriter(save_path)
    writer.add_scalar('batch_size', parameters.BATCH_SIZE)
    writer.add_scalar(
        'weight_decay',
        parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])

    loss_func, include_boundaries = get_loss()

    # Honestly probably do not need to have hyper-parameters per model, but leave it for now.
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr'],
        weight_decay=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay_step'],
        gamma=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay'])

    return model, loss_func, include_boundaries, optimizer, scheduler, writer
Exemplo n.º 4
0
def train(args):
    args, model, iters, vocab, ckpt_available = get_model_ckpt(args)

    if ckpt_available:
        print("loaded checkpoint {}".format(args.ckpt_name))
    loss_fn = get_loss(args, vocab)
    optimizer = get_optimizer(args, model)

    trainer = get_trainer(args, model, loss_fn, optimizer)
    metrics = get_metrics(args, vocab)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    logger = get_logger(args)
    @trainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Training")

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'train/iter', engine.state, engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'train/epoch', engine.state, engine.state.epoch)
        state = evaluate_once(evaluator, iterator=iters['val'])
        log_results(logger, 'valid/epoch', state, engine.state.epoch)
        save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model, vocab)

    trainer.run(iters['train'], max_epochs=args.max_epochs)
Exemplo n.º 5
0
def main():
    # Parse command-line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("command", help="Either 'plot' or 'write'.")
    parser.add_argument("folder", help="Folder containing log file.")
    args = parser.parse_args()

    # Verify command
    if args.command not in ['plot', 'write']:
        raise Exception("Unknown command {}".format(args.command))

    # Get log filepath
    log_filepath = os.path.join(args.folder, "train.log")

    # Get loss values from file
    with open(log_filepath, "r") as log_file:
        step_loss = loss.get_loss(log_file)

    # Branch by command
    if args.command == "write":
        # Output the step loss as a CSV
        step_loss.to_csv(os.path.join(args.folder, "train.csv"), header=True)
    elif args.command == "plot":
        # Show plot of loss
        step_loss.plot()
        plt.ylim(0)
        plt.show()
Exemplo n.º 6
0
def main():
    '''
    Main Function

    '''

    #Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer
    assert_and_infer_cfg(args)
    writer = prep_experiment(args, parser)
    train_loader, val_loader, train_obj = datasets.setup_loaders(args)
    criterion, criterion_val = loss.get_loss(args)
    net = network.get_net(args, criterion)
    optim, scheduler = optimizer.get_optimizer(args, net)

    torch.cuda.empty_cache()

    if args.evaluate:
        # Early evaluation for benchmarking
        validate(val_loader, net, criterion_val, optim, epoch, writer)
        evaluate(val_loader, net)
        return

    #Main Loop
    for epoch in range(args.start_epoch, args.max_epoch):
        # Update EPOCH CTR
        cfg.immutable(False)
        cfg.EPOCH = epoch
        cfg.immutable(True)

        scheduler.step()

        train(train_loader, net, criterion, optim, epoch, writer)
        validate(val_loader, net, criterion_val, optim, epoch, writer)
Exemplo n.º 7
0
def train(args):
    iters, vocab = get_iterator(args)

    model = get_model(args, vocab)
    loss_fn = get_loss(args, vocab)
    optimizer = get_optimizer(args, model)

    trainer = get_trainer(args, model, loss_fn, optimizer)
    metrics = get_metrics(args, vocab)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    logger = get_logger(args)
    @trainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Training")

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'train/iter', engine.state, engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'train/epoch', engine.state, engine.state.epoch)
        state = evaluate_once(evaluator, iterator=iters['val'])
        log_results(logger, 'valid/epoch', state, engine.state.epoch)

    trainer.run(iters['train'], max_epochs=args.max_epochs)
Exemplo n.º 8
0
def main():
    """
    Main Function
    """
    # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer
    assert_and_infer_cfg(args)
    prep_experiment(args, parser)
    writer = None

    _, _, _, extra_val_loaders, _ = datasets.setup_loaders(args)

    criterion, criterion_val = loss.get_loss(args)
    criterion_aux = loss.get_loss_aux(args)
    net = network.get_net(args, criterion, criterion_aux)

    optim, scheduler = optimizer.get_optimizer(args, net)

    net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net)
    net = network.warp_network_in_dataparallel(net, args.local_rank)
    epoch = 0
    i = 0

    if args.snapshot:
        epoch, mean_iu = optimizer.load_weights(net, optim, scheduler,
                            args.snapshot, args.restore_optimizer)

    print("#### iteration", i)
    torch.cuda.empty_cache()
    # Main Loop
    # for epoch in range(args.start_epoch, args.max_epoch):

    for dataset, val_loader in extra_val_loaders.items():
        print("Extra validating... This won't save pth file")
        validate(val_loader, dataset, net, criterion_val, optim, scheduler, epoch, writer, i, save_pth=False)
Exemplo n.º 9
0
    def forward(self, document_embeddings, question_embeddings, answer,
                document):
        U = self.encoder(document_embeddings, question_embeddings)
        start_logits, end_logits = self.decoder(U)

        loss, baseline, em = get_loss(start_logits, end_logits, answer,
                                      document, self.sigma_ce[0],
                                      self.sigma_rl[0])

        return loss, baseline, em
Exemplo n.º 10
0
    def update(self, input, label):
        predicted = self.classifier(input)

        loss = get_loss(predicted, label, avg_meters=self.avg_meters)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return {'predicted': predicted}
Exemplo n.º 11
0
def train(args):
    args, model, iters, vocab, ckpt_available = get_model_ckpt(args)

    if ckpt_available:
        print("loaded checkpoint {}".format(args.ckpt_name))
    loss_fn = get_loss(args, vocab)
    optimizer = get_optimizer(args, model)

    pretrainer = get_pretrainer(args, model, loss_fn, optimizer)
    trainer = get_trainer(args, model, loss_fn, optimizer)

    metrics = get_metrics(args, vocab)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    logger = get_logger(args)

    @pretrainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Pretraining")

    @pretrainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'pretrain/iter', engine.state, engine.state.iteration)

    @pretrainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'pretrain/epoch', engine.state, engine.state.epoch)

    """
    @pretrainer.on(Events.COMPLETED)
    def unfreeze_language_model(engine):
        for param in model.module.language_model.base_model.parameters():
            param.requires_grad = True
    """

    @trainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Training")

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'train/iter', engine.state, engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'train/epoch', engine.state, engine.state.epoch)
        state = evaluate_once(evaluator, iterator=iters['val'])
        log_results(logger, 'valid/epoch', state, engine.state.epoch)
        log_results_cmd('valid/epoch', state, engine.state.epoch)
        save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model, vocab)
        evaluate_by_logic_level(args, model, iterator=iters['val'])

    if args.pretrain_epochs > 0:
        pretrainer.run(iters['pretrain'], max_epochs=args.pretrain_epochs) 
    trainer.run(iters['train'], max_epochs=args.max_epochs)
Exemplo n.º 12
0
def evaluate(args):
    args, model, iters, vocab, ckpt_available = get_model_ckpt(args)
    if ckpt_available:
        print("loaded checkpoint {}".format(args.ckpt_name))
    loss_fn = get_loss(args, vocab)

    metrics = get_metrics(args, vocab)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    state = evaluate_once(evaluator, iterator=iters['val'])
    log_results_cmd('valid/epoch', state, 0)
Exemplo n.º 13
0
def main():

    """
    Main Function
    """

    # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer
    assert_and_infer_cfg(args)
    writer = prep_experiment(args, parser)
    train_loader, val_loader, train_obj = datasets.setup_loaders(args)
    criterion, criterion_val = loss.get_loss(args)
    net = network.get_net(args, criterion)
    optim, scheduler = optimizer.get_optimizer(args, net)

    if args.fix_bn:
        net.apply(set_bn_eval)
        print("Fix bn for finetuning")

    if args.fp16:
        net, optim = amp.initialize(net, optim, opt_level="O1")

    net = network.wrap_network_in_dataparallel(net, args.apex)
    if args.snapshot:
        optimizer.load_weights(net, optim,
                               args.snapshot, args.restore_optimizer)
    if args.evaluateF:
        assert args.snapshot is not None, "must load weights for evaluation"
        evaluate(val_loader, net, args)
        return
    torch.cuda.empty_cache()
    # Main Loop
    for epoch in range(args.start_epoch, args.max_epoch):
        # Update EPOCH CTR
        cfg.immutable(False)
        cfg.EPOCH = epoch
        cfg.immutable(True)

        scheduler.step()
        train(train_loader, net, optim, epoch, writer)
        if args.apex:
            train_loader.sampler.set_epoch(epoch + 1)
        validate(val_loader, net, criterion_val,
                 optim, epoch, writer)
        if args.class_uniform_pct:
            if epoch >= args.max_cu_epoch:
                train_obj.build_epoch(cut=True)
                if args.apex:
                    train_loader.sampler.set_num_samples()
            else:
                train_obj.build_epoch()
Exemplo n.º 14
0
def train():
    filename_queue = tf.train.string_input_producer([tfrecord_filename])
    images, labels = reader.read_and_decode(filename_queue=filename_queue,
                                            batch_size=FLAGS.batch_size)
    with tf.device('/gpu:0'):
        global_step = slim.create_global_step()
    with tf.device('/cpu:0'):
        num_batches_per_epoch = FLAGS.data_size / FLAGS.batch_size
        decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
        lrn_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                              global_step,
                                              decay_steps,
                                              LEARNING_RATE_DECAY_FACTOR,
                                              staircase=True)
    with tf.device('/gpu:0'):
        fc8 = alexnet.model(input_data=images,
                            n_classes=FLAGS.n_classes,
                            keep_prob=FLAGS.keep_prob)
        total_loss = loss.get_loss(input_data=fc8, grdtruth=labels)
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        losses = tf.get_collection('losses')
        loss_averages_op = loss_averages.apply(losses + [total_loss])
        with tf.control_dependencies([loss_averages_op]):
            optimizer = tf.train.AdamOptimizer(lrn_rate)
        train_step = optimizer.minimize(loss=total_loss,
                                        global_step=global_step)
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variable_averages_op = variable_averages.apply(
            tf.trainable_variables())
        with tf.control_dependencies([train_step, variable_averages_op]):
            train_step = tf.no_op(name='train')
        prediction = alexnet.classify(fc8)
    with tf.Session() as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        for j in range(FLAGS.max_epoch):
            for i in range(200):
                _, loss_value, pre, grd = sess.run(
                    [train_step, total_loss, prediction, labels])
                print str(j + 1) + ' epoch' + ' ' + str(
                    i) + ' minibatch' + ':' + str(loss_value)
                print str(j + 1) + ' epoch' + ' ' + str(
                    i) + ' minibatch' + ':' + str(pre) + " " + str(grd)
        coord.request_stop()
        coord.join(threads)
Exemplo n.º 15
0
def infer(args):
    args, model, iters, vocab, ckpt_available = get_model_ckpt(args)
    if ckpt_available:
        print("loaded checkpoint {}".format(args.ckpt_name))
    loss_fn = get_loss(args, vocab)

    evaluator = get_evaluator(args, model, loss_fn)

    answers = evaluate_once(evaluator, iterator=iters['test'])
    keys = sorted(list(answers.keys()))
    answers = [{"correct_idx": answers[key], "qid": key} for key in keys]
    path = str(args.data_path.parent / 'answers.json')
    with open(path, 'w') as f:
        json.dump(answers, f, indent=4)

    print("saved outcome at {}".format(path))
Exemplo n.º 16
0
def main(eval_args=None):
    '''
    Main Function

    '''
    # Parse arguments from rest_communication.py
    #args = parser.parse_args(eval_args)
    if args.snapshot == None:
        args.snapshot = "checkpoints/best_cityscapes_checkpoint.pth"

    train_loader, val_loader, train_obj = datasets.setup_loaders(args)
    criterion, criterion_val = loss.get_loss(args)
    net = network.get_net(args, criterion)
    net = restore_snapshot(net)
    torch.cuda.empty_cache()

    return evaluate(val_loader, net)
Exemplo n.º 17
0
def train(args):
    args, model, iters, ckpt_available = get_model_ckpt(args)

    if ckpt_available:
        print("loaded checkpoint {}".format(args.ckpt_name))
    loss_fn = get_loss(args)
    optimizer = get_optimizer(args, model)

    trainer = get_trainer(args, model, loss_fn, optimizer)

    metrics = get_metrics(args)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    logger = get_logger(args)


    trainer.run(iters['train']), max_epochs=args.max_epochs)
def main():
    # Create dataset
    content_ds = ImageDataset(CONTENT_DS_PATH, batch_size=BATCH_SIZE)
    style_ds = ImageDataset(STYLE_DS_PATH, batch_size=BATCH_SIZE)

    # Build model
    vgg19 = build_vgg19(INPUT_SHAPE, VGG_PATH)  # encoder
    decoder = build_decoder(vgg19.output.shape[1:])  # input shape == encoder output shape
    model = build_model(vgg19, decoder, INPUT_SHAPE)

    #model.load_weights(SAVE_PATH)

    # Get loss
    vgg19_relus = build_vgg19_relus(vgg19)
    loss = get_loss(vgg19, vgg19_relus, epsilon=EPSILON, style_weight=STYLE_WEIGHT, color_weight=COLOR_LOSS)

    # Train model
    train(model, content_ds, style_ds, loss, n_epochs=EPOCHS, save_path=SAVE_PATH)
Exemplo n.º 19
0
def pretrain(args):
    tf = get_transform(args, 'none')
    ds = get_dataset(args, tf, 'none')

    args, model, ckpt_available = get_model_ckpt(args)

    if ckpt_available:
        print("loaded checkpoint {} in pretraining stage".format(args.ckpt_name))
        loss_fn = get_loss(args)
        sub_optimizer = get_sub_optimizer(args, model)
        optimizer = get_optimizer(args, sub_optimizer)
        scheduler = get_scheduler(args, optimizer)

        # setup nvidia/apex amp
        # model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level, num_losses=1)
        # model = idist.auto_model(model)

        trainer = get_trainer(args, model, loss_fn, optimizer, scheduler)
Exemplo n.º 20
0
def create_model(config):
    model_checkpoint_path = config.project_config['model_checkpoint_path']
    model_name = config.model_name

    model_path = f'{model_checkpoint_path}/{model_name}'

    model, loss_inp = DeepICPBuilder(config.net_config).build()
    loss = get_loss(config.train_config["loss_alpha"])
    source_pts, target_pts, GT = loss_inp
    model.add_loss(loss(source_pts, target_pts, GT))

    optimizer = Adam(learning_rate=config.train_config['learning_rate'])
    model.compile(optimizer=optimizer)

    save_model(model, config)
    config.save_current_model_net_config()

    return model
Exemplo n.º 21
0
def train():
    filename_queue = tf.train.string_input_producer([tfrecord_filename])
    images, labels = reader.read_and_decode(filename_queue=filename_queue,
                                            batch_size=FLAGS.batch_size)
    with tf.device('/gpu:0'):
        global_step = slim.create_global_step()
    with tf.device('/cpu:0'):
        num_batches_per_epoch = FLAGS.data_size / FLAGS.batch_size
        decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
        lrn_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                              global_step,
                                              decay_steps,
                                              LEARNING_RATE_DECAY_FACTOR,
                                              staircase=True)
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.6
    with tf.device('/gpu:0'):
        optimizer = tf.train.AdamOptimizer(lrn_rate)
        fc8 = vgg_m.model(input_data=images,
                          n_classes=FLAGS.n_classes,
                          keep_prob=FLAGS.keep_prob)
        losses = loss.get_loss(input_data=fc8, grdtruth=labels)
        train_step = optimizer.minimize(loss=losses, global_step=global_step)
        prediction = vgg_m.classify(fc8)
    with tf.device('/cpu:0'):
        saver = tf.train.Saver()
    with tf.Session(config=config) as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        for j in range(FLAGS.max_epoch):
            for i in range(200):
                _, loss_value, pre, grd = sess.run(
                    [train_step, losses, prediction, labels])
                print str(j + 1) + ' epoch' + ' ' + str(
                    i) + ' minibatch' + ':' + str(loss_value)
                print str(j + 1) + ' epoch' + ' ' + str(
                    i) + ' minibatch' + ':' + str(pre) + " " + str(grd)
        save_path = saver.save(sess, model_path)
        print("Model saved in file:%s" % save_path)
        coord.request_stop()
        coord.join(threads)
Exemplo n.º 22
0
def train(epoch):
    print('#' * 15)
    print('Epoch {}, Latent Size {}'.format(epoch, model.latent_size))
    print('#' * 15)
    model.train()
    for index, (x, _) in enumerate(loader):
        x = x.mean(dim=1, keepdim=True).to(device)
        optimizer.zero_grad()
        x_generated, mu, logvar = model(x)
        loss = get_loss(x_generated, x, mu, logvar)
        loss.backward()
        optimizer.step()
        if index % 100 == 0:
            print('Loss at iteration {0}: {1:.4f}'.format(index, loss.item()))
    if epoch == 4:
        filename = 'epoch{}_ls{}.pkl'.format(epoch, model.latent_size)
        torch.save(model.state_dict(), os.path.join(weights_dir, filename))
    if epoch < 4:
        scheduler.step()
Exemplo n.º 23
0
def pretrain(args):
    tf = get_transform(args, 'none')
    ds = get_dataset(args, tf, 'none')

    args, model, ckpt_available = get_model_ckpt(args)

    if ckpt_available:
        print("loaded checkpoint {} in pretraining stage".format(
            args.ckpt_name))
    loss_fn = get_loss(args)
    sub_optimizer = get_sub_optimizer(args, model)
    optimizer = get_optimizer(args, sub_optimizer)
    scheduler = get_scheduler(args, optimizer)

    # setup nvidia/apex amp
    # model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level, num_losses=1)
    # model = idist.auto_model(model)

    trainer = get_trainer(args, model, loss_fn, optimizer, scheduler)

    metrics = get_metrics(args)
    logger = get_logger(args)

    @trainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Pretraining")

        # batch-wise
    @trainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'pretrain/iter', engine.state,
                    engine.state.iteration)

    # epoch-wise (ckpt)
    @trainer.on(Events.EPOCH_COMPLETED)
    def save_epoch(engine):
        log_results(logger, 'pretrain/epoch', engine.state, engine.state.epoch)
        log_results_cmd(logger, 'pretrain/epoch', engine.state,
                        engine.state.epoch)
        save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'],
                  model)

    trainer.run(ds, max_epochs=args.epoch)
Exemplo n.º 24
0
def run(args):

    set_seed(args.seed)
    xp = create_experiment(args)
    train_loader, val_loader, test_loader = get_loaders(args)
    loss = get_loss(xp, args)

    model = get_model(args)
    if args.load_model:
        load_model(model, args.load_model)

    if args.cuda:
        if args.parallel_gpu:
            model = torch.nn.DataParallel(model).cuda()
        else:
            torch.cuda.set_device(args.device)
            model.cuda()
        loss.cuda()

    optimizer = get_optimizer(model, args.mu, args.lr_0, xp)
    if args.load_optimizer:
        load_optimizer(optimizer, args.load_optimizer, args.lr_0)

    with logger.stdout_to("{}_log.txt".format(args.out_name)):
        clock = -time.time()
        for _ in range(args.epochs):
            xp.Epoch.update(1).log()
            optimizer = update_optimizer(args.lr_schedule, optimizer, model,
                                         loss, xp)

            xp.Learning_Rate.update().log()
            xp.Mu.update().log()
            xp.Temperature.update().log()

            train(model, loss, optimizer, train_loader, xp, args)
            test(model, loss, val_loader, xp, args)

        test(model, loss, test_loader, xp, args)
        clock += time.time()

        print("\nEvaluation time:  \t {0:.2g} min".format(clock * 1. / 60))
Exemplo n.º 25
0
def valid(loader, dt):
    return 0
    # validation
    dt.eval()
    valid_loss = []
    for j, vld_data in enumerate(tqdm(loader)):
        imgs, flows, inv_flows, masks, labels, n_clusters, _ = vld_data
        if imgs.dim() == 1:
            continue
        imgs, flows, inv_flows, masks, labels, n_clusters = \
            imgs.cuda(), flows.cuda(), inv_flows.cuda(), masks.cuda(), labels.cuda(), n_clusters.cuda()
        with torch.no_grad():
            fgmask, emb, tail = dt(imgs, flows, inv_flows)
            loss, fg_loss, var_loss, dist_loss = get_loss(
                fgmask, emb, tail, masks, labels, n_clusters)
            valid_loss.append(loss.detach().cpu())

    valid_loss = np.mean(valid_loss)
    print(f"Validation Loss: {valid_loss}")
    dt.train()
    return valid_loss
Exemplo n.º 26
0
def eval_linear(pretrain_args, args):
    # get pretrained model
    pt_args, pt_model, ckpt_available = get_model_ckpt(pretrain_args)
    
    tf = get_transform(args, 'train')
    ds = get_dataset(args, tf, 'train')

    if ckpt_available:
        print("loaded pretrained model {} in eval linear".format(args.ckpt_name))

    model = get_linear(args, pt_model, args.num_classes)
    loss_fn = get_loss(args)
    optimizer = get_sub_optimizer(args, model)
    scheduler = get_scheduler(args, optimizer)

    trainer = get_trainer(args, model, loss_fn, optimizer, scheduler)
    evaluator = get_evaluator(args, model, loss_fn)

    # metrics = get_metrics(args)
    logger = get_logger(args)
    trainer.run(ds, max_epochs=args.epoch)
Exemplo n.º 27
0
def test():
    filename_queue = tf.train.string_input_producer([tfrecord_filename])
    images, labels = reader.read_and_decode(filename_queue=filename_queue,
                                            batch_size=FLAGS.batch_size,
                                            shuffle_batch=False)
    # with tf.device('/gpu:0'):
    #     global_step = slim.create_global_step()
    # with tf.device('/cpu:0'):
    #     num_batches_per_epoch = FLAGS.data_size / FLAGS.batch_size
    #     decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
    #     lrn_rate = tf.train.exponential_decay(
    #         FLAGS.learning_rate, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True)
    with tf.device('/gpu:0'):
        # optimizer = tf.train.AdamOptimizer(lrn_rate)
        fc8 = alexnet.model(input_data=images,
                            n_classes=FLAGS.n_classes,
                            keep_prob=FLAGS.keep_prob)
        losses = loss.get_loss(input_data=fc8, grdtruth=labels)
        # train_step = optimizer.minimize(loss=losses, global_step=global_step)
        prediction = alexnet.classify(fc8)
    with tf.device('/cpu:0'):
        saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, model_path)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        total = 0.0
        right = 0.0
        for i in range(80):
            loss_value, pre, grd = sess.run([losses, prediction, labels])
            print str(i + 1) + 'image loss :' + str(loss_value)
            print str(i + 1) + ' result:' + ':' + str(pre) + " " + str(grd)
            right += np.sum(np.equal(pre, grd))
            total += 10
            print 'accurcy:' + str(right / total)

        coord.request_stop()
        coord.join(threads)
Exemplo n.º 28
0
def load_model(config):
    model_checkpoint_path = config.project_config['model_checkpoint_path']
    model_name = config.model_name
    weights = config.weights

    optimizer = Adam(learning_rate=config.train_config['learning_rate'])
    optimizer_weights_path = f'{model_path}/optimizer_{checkpoint}.pkl'
    with open(optimizer_weights_path, 'rb') as f:
        weight_values = pickle.load(f)
    optimizer.set_weights(weight_values)

    model, loss_inp = DeepICPBuilder(config.net_config).build()
    loss = get_loss(config.train_config["loss_alpha"])
    source_pts, target_pts, GT = loss_inp
    model.add_loss(loss(source_pts, target_pts, GT))

    optimizer = Adam(learning_rate=config.train_config['learning_rate'])
    model.compile(optimizer=optimizer)

    model.load_weights(model_weights)

    return model
Exemplo n.º 29
0
def main(args):

    set_cuda(args)
    set_seed(args)

    loader_train, loader_val, loader_test = get_data_loaders(args)
    loss = get_loss(args)
    model = get_model(args)
    optimizer = get_optimizer(args, model, loss, parameters=model.parameters())
    xp = setup_xp(args, model, optimizer)

    for i in range(args.epochs):
        xp.epoch.update(i)

        train(model, loss, optimizer, loader_train, args, xp)
        test(model, optimizer, loader_val, args, xp)

        if (i + 1) in args.T:
            decay_optimizer(optimizer, args.decay_factor)

    load_best_model(model, '{}/best_model.pkl'.format(args.xp_name))
    test(model, optimizer, loader_val, args, xp)
    test(model, optimizer, loader_test, args, xp)
Exemplo n.º 30
0
def initialize_training(model_id, save_path, model_type=0, pre_train_path=None):
    # The get_model method is in charge of 
    # setting the same seed for each loaded model.
    # Thus, for each inner loop we train the same initialized model
    # Load model_0 to continue training with it
    if str(model_id).lower() == 'same':
        final_slash  = save_path.rindex('/')
        model_0_path = os.path.join(save_path[:final_slash], "Model_0/model.pt")
        model = torch.load(model_0_path, map_location=parameters.device)
    elif parameters.PRE_TRAIN and model_type == 0: # Load a pre-trained model
        print ("Loading Pre-Trained Model 0")
        model = torch.load(pre_train_path, map_location=parameters.device)
    elif parameters.HIERARCHICAL_PRE_TRAIN and model_type == 1:
        print ("Loading Pre-Trained Model 1")
        model = torch.load(pre_train_path, map_location=parameters.device)
    else:
        model = get_model(model_id).to(parameters.device)

    print(model)
    writer = SummaryWriter(save_path)
    writer.add_scalar('batch_size', parameters.BATCH_SIZE)
    writer.add_scalar('weight_decay', parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])

    # Include whether we are using the second stage model
    second_stage = (model_type == 1)
    loss_func, include_boundaries = get_loss(is_second_stage=second_stage)

    # Honestly probably do not need to have hyper-parameters per model, but leave it for now.
    if str(model_id).lower() == 'same':
        model_id = parameters.MODEL_ID

    optimizer = torch.optim.Adam(model.parameters(), lr=parameters.HYPERPARAMETERS[model_id]['lr'],
                                 weight_decay=parameters.HYPERPARAMETERS[model_id]['l2_reg'])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, parameters.HYPERPARAMETERS[model_id]['lr_decay_step'], 
                                            gamma=parameters.HYPERPARAMETERS[model_id]['lr_decay'])

    return model, loss_func, include_boundaries, optimizer, scheduler, writer