Пример #1
0
def train(obj, optimizer, dataset, xp, args, epoch):

    xp.Timer_Train.reset()
    stats = {}

    for i, x, y in tqdm(optimizer.get_sampler(dataset),
                        desc='Train Epoch',
                        leave=False,
                        total=optimizer.get_sampler_len(dataset)):

        oracle_info = obj.oracle(optimizer.variables.w, x, y)
        oracle_info['i'] = i
        optimizer.step(oracle_info)

        # track statistics for monitoring
        stats['obj'] = float(oracle_info['obj'])
        stats['error'] = float(obj.task_error(optimizer.variables.w, x, y))
        stats['size'] = float(x.size(0))
        update_metrics(xp, stats)

    xp.Timer_Train.update()

    print('\nEpoch: [{0}] (Train) \t'
          '({timer:.2f}s) \t'
          'Obj {obj:.3f}\t'
          'Error {error:.2f}\t'.format(
              int(xp.Epoch.value),
              timer=xp.Timer_Train.value,
              error=xp.Error_Train.value,
              obj=xp.Obj_Train.value,
          ))
    log_metrics(xp, epoch)
def test_log_metrics(capsys):
    engine = Engine(lambda e, b: None)
    engine.logger = setup_logger(format="%(message)s")
    engine.run(list(range(100)), max_epochs=2)
    log_metrics(engine, "train")
    captured = capsys.readouterr()
    assert captured.err.split("\n")[-2] == "train [2/200]: {}"
Пример #3
0
 def run_validation():
     epoch = trainer.state.epoch
     state = train_evaluator.run(train_eval_loader)
     utils.log_metrics(logger, epoch, state.times["COMPLETED"], "Train",
                       state.metrics)
     state = evaluator.run(val_loader)
     utils.log_metrics(logger, epoch, state.times["COMPLETED"], "Test",
                       state.metrics)
Пример #4
0
def evaluation(local_rank, config, logger, with_clearml):

    rank = idist.get_rank()
    device = idist.device()
    manual_seed(config.seed + local_rank)

    data_loader = config.data_loader
    model = config.model.to(device)

    # Load weights:
    state_dict = get_model_weights(config, logger, with_clearml)
    model.load_state_dict(state_dict)

    # Adapt model to dist config
    model = idist.auto_model(model)

    # Setup evaluators
    num_classes = config.num_classes
    cm_metric = ConfusionMatrix(num_classes=num_classes)

    val_metrics = {
        "IoU": IoU(cm_metric),
        "mIoU_bg": mIoU(cm_metric),
    }

    if ("val_metrics" in config) and isinstance(config.val_metrics, dict):
        val_metrics.update(config.val_metrics)

    evaluator = create_evaluator(model,
                                 val_metrics,
                                 config,
                                 with_clearml,
                                 tag="val")

    # Setup Tensorboard logger
    if rank == 0:
        tb_logger = common.TensorboardLogger(
            log_dir=config.output_path.as_posix())
        tb_logger.attach_output_handler(
            evaluator,
            event_name=Events.COMPLETED,
            tag="validation",
            metric_names="all",
        )

    # Log confusion matrix to ClearML:
    if with_clearml:
        evaluator.add_event_handler(Events.COMPLETED, compute_and_log_cm,
                                    cm_metric, evaluator.state.iteration)

    state = evaluator.run(data_loader)
    utils.log_metrics(logger, 0, state.times["COMPLETED"], "Validation",
                      state.metrics)

    if idist.get_rank() == 0:
        tb_logger.close()
Пример #5
0
def train(model, loss, optimizer, loader, xp, args):

    model.train()

    xp.Timer_Train.reset()
    stats_dict = {}

    for x, y in tqdm(loader, disable=not args.tqdm, desc='Train Epoch',
                     leave=False, total=len(loader)):
        (x, y) = (x.cuda(), y.cuda()) if args.cuda else (x, y)

        # forward pass
        scores = model(x)

        # compute the loss function, possibly using smoothing
        with set_smoothing_enabled(args.smooth_svm):
            loss_value = loss(scores, y)

        # backward pass
        optimizer.zero_grad()
        loss_value.backward()

        # optimization step
        optimizer.step(lambda: float(loss_value))

        # monitoring
        stats_dict['loss'] = float(loss(scores, y))
        stats_dict['acc'] = float(accuracy(scores, y))
        stats_dict['gamma'] = float(optimizer.gamma)
        stats_dict['size'] = float(scores.size(0))
        update_metrics(xp, stats_dict)

    xp.Eta.update(optimizer.eta)
    xp.Reg.update(regularization(model, args.l2))
    xp.Obj_Train.update(xp.Reg.value + xp.Loss_Train.value)
    xp.Timer_Train.update()

    print('\nEpoch: [{0}] (Train) \t'
          '({timer:.2f}s) \t'
          'Obj {obj:.3f}\t'
          'Loss {loss:.3f}\t'
          'Acc {acc:.2f}%\t'
          .format(int(xp.Epoch.value),
                  timer=xp.Timer_Train.value,
                  acc=xp.Acc_Train.value,
                  obj=xp.Obj_Train.value,
                  loss=xp.Loss_Train.value))

    log_metrics(xp)
Пример #6
0
def run_val(epoch):
    #global val_dataloader
    #global writer

    running_labels = []
    running_predicted = []
    for timestep, example in tqdm(enumerate(val_dataloader)):
        pred, labels = forward_pass(example)
        #predicted = torch.round(pred)
        #print("predicted", predicted.shape)
        #print("labels", labels.shape)
        # running_labels += labels.view(-1).cpu().detach().tolist()
        # running_predicted += predicted.view(-1).cpu().detach().tolist()

    log_metrics(writer, epoch, running_predicted, running_labels, 'val', config)
Пример #7
0
def gan_forward(inputs, labels, model, loss_fn, writer, device, batch_num, N):
    metrics = {}
    sketches, photos = torch.split(inputs, N)
    logits_real, logits_fake = model(sketches, photos)

    d_loss, g_loss = loss_fn(logits_real, logits_fake, device)

    metrics['d_loss'] = d_loss
    metrics['g_loss'] = g_loss

    metrics['loss'] = 0
    for metric_name in metrics:
        if metric_name.endswith('loss') and metric_name != 'loss':
            metrics['loss'] += metrics[metric_name]

    log_metrics(metrics, writer, "batch", batch_num)

    return metrics
Пример #8
0
def classify_contrast_forward(inputs, labels, model, loss_fn, writer, device,
                              batch_num, alpha, loss_type, N):
    metrics = {}
    features = model.extract_features(inputs)
    indices = torch.tensor(range(0, 2 * N)).to(device)
    selected_features = torch.index_select(features, 0, indices)
    logits = model.make_predictions(selected_features)
    sketch_logits, photo_logits = torch.split(logits, N)

    if loss_type == "classify":
        metrics['loss'] = loss_fn(sketch_logits, photo_logits, labels)
    else:
        # reorganize into photo embeds and sketch embeds
        # feed in embed for photo and sketch
        metrics['embedding_loss'], metrics['classification_loss'] = loss_fn(
            *torch.split(features, N), sketch_logits, photo_logits, labels)

        metrics['loss'] = alpha * metrics['embedding_loss'] + (
            1 - alpha) * metrics['classification_loss']

    _, sketch_preds = torch.max(sketch_logits, 1)
    _, photo_preds = torch.max(photo_logits, 1)

    sketch_cor = sum(sketch_preds.cpu().numpy() == labels.cpu().numpy())
    photo_cor = sum(photo_preds.cpu().numpy() == labels.cpu().numpy())

    metrics['sketch_cor'] = sketch_cor
    metrics['photo_cor'] = photo_cor

    log_metrics(metrics, writer, "batch", batch_num)

    # TODO Change this to be args.verbose
    if True:
        print("=" * 100)
        print("Predicted classes for sketches: {}".format(
            sketch_preds.cpu().tolist()))
        print("Predicted classes for photos: {}".format(
            photo_preds.cpu().tolist()))
        print("Ground truth: {}".format(labels.cpu().tolist()))
        print("=" * 100)

    return metrics
Пример #9
0
 def _log_epoch_summary(self, epoch_summary: EpochSummaryType,
                        epoch_number: int) -> None:
     utils.log_metrics(self.logger, epoch_summary['valid_f1'], "valid",
                       epoch_number + 1)
     utils.log_metrics(self.logger, epoch_summary['train_f1'], "train",
                       epoch_number + 1)
     self.logger.scalar_summary("train loss_avg",
                                epoch_summary['train_loss_avg'],
                                (epoch_number + 1))
     self.logger.scalar_summary("valid_accuracy",
                                epoch_summary['valid_accuracy'],
                                (epoch_number + 1))
     self.logger.scalar_summary("train_accuracy",
                                epoch_summary['train_accuracy'],
                                epoch_number + 1)
     self.logger.scalar_summary("valid_f1_avg",
                                epoch_summary['valid_f1_avg'],
                                epoch_number + 1)
     self.logger.scalar_summary("train_f1_avg",
                                epoch_summary['train_f1_avg'],
                                epoch_number + 1)
Пример #10
0
def fit(model,
        optimizer,
        scheduler,
        criterion,
        train_loader,
        val_loader,
        start_epoch=0,
        end_epoch=24):
    metrics = []
    for epoch in range(start_epoch, end_epoch):
        epoch_metrics = {}
        start_time = time()
        train_loss = train(model, optimizer, criterion, train_loader)
        end_time = time()
        epoch_metrics, _ = evaluate(model, criterion, val_loader)
        epoch_metrics['train_loss'] = train_loss
        epoch_metrics['epoch'] = epoch
        epoch_metrics['time'] = end_time - start_time
        epoch_metrics['lr'] = optimizer.param_groups[0]["lr"]
        metrics.append(epoch_metrics)
        log_metrics(epoch_metrics, TRAIN_LOG)
        if scheduler != None:
            scheduler.step(epoch_metrics['train_loss'])
    return metrics
Пример #11
0
def train_one_epoch(epoch, experiment_id):
    #global writer
    #global train_dataloader

    #print("Epoch is ", epoch)
    epoch_start_time = time.time()
        
    running_loss = 0
    running_labels = []
    running_predicted = []
    running_samples = 0
    
    for timestep, example in tqdm(enumerate(train_dataloader)):
        #print("timestep", timestep)
        loss, pred, labels = forwardbackwardpass(example)
        #print("pred shape", pred.shape)
        #print("pred max shape", pred.argmax(2).shape)
        
        '''convert tokens to string to predict blue score'''
        

        if timestep % 5 == 0:
            scheduler.step()
        #print("predicted", predicted.shape)
        #print("labels", labels.shape)

        running_loss += loss.item()
        running_samples += example[0].shape[0]
        
    blue, loss = log_metrics(writer, epoch, running_predicted, running_labels, 'train', config, running_loss, running_samples)
    trainer.eval()
    with torch.no_grad():
        run_val(epoch)

    if epoch%config['model_save_frequency_in_epochs'] == 0:
        save_model(epoch, loss, experiment_id)
    print("time taken for the epoch is ", time.time() - epoch_start_time)
Пример #12
0
def train(model, triples, ent_num):
    logging.info("Start Training...")
    logging.info("batch_size = %d" % config.batch_size)
    logging.info("dim = %d" % config.ent_dim)
    logging.info("gamma = %f" % config.gamma)

    current_lr = config.learning_rate
    train_triples, valid_triples, test_triples = triples
    all_true_triples = train_triples + valid_triples + test_triples
    rtp = rel_type(train_triples)

    optimizer = get_optim("Adam", model, current_lr)
    train_iterator = train_data_iterator(train_triples, ent_num)

    if config.init_checkpoint:
        logging.info("Loading checkpoint...")
        checkpoint = torch.load(os.path.join(config.save_path, "checkpoint"))
        init_step = checkpoint["step"] + 1
        model.load_state_dict(checkpoint["model_state_dict"])
        if config.use_old_optimizer:
            current_lr = checkpoint["current_lr"]
            optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    else:
        init_step = 1

    max_hit1 = 0.0
    max_mrr = 0.0
    training_logs = []
    # Training Loop
    for step in range(init_step, config.max_step):
        log = train_step(model, optimizer, next(train_iterator))
        training_logs.append(log)

        # log
        if step % config.log_step == 0:
            metrics = {}
            for metric in training_logs[0].keys():
                metrics[metric] = sum([log[metric] for log in training_logs
                                       ]) / len(training_logs)
            log_metrics("Training average", step, metrics)
            training_logs = []

        # valid
        if step % config.valid_step == 0:
            logging.info(
                "---------------Evaluating on Valid Dataset---------------")
            metrics = test_step(model, valid_triples, all_true_triples,
                                ent_num, rtp)
            metrics, metrics1, metrics2, metrics3, metrics4, metrics5, metrics6, metrics7, metrics8 = metrics
            logging.info("----------------Overall Results----------------")
            log_metrics("Valid", step, metrics)
            logging.info("-----------Prediction Head... 1-1 -------------")
            log_metrics("Valid", step, metrics1)
            logging.info("-----------Prediction Head... 1-M -------------")
            log_metrics("Valid", step, metrics2)
            logging.info("-----------Prediction Head... M-1 -------------")
            log_metrics("Valid", step, metrics3)
            logging.info("-----------Prediction Head... M-M -------------")
            log_metrics("Valid", step, metrics4)
            logging.info("-----------Prediction Tail... 1-1 -------------")
            log_metrics("Valid", step, metrics5)
            logging.info("-----------Prediction Tail... 1-M -------------")
            log_metrics("Valid", step, metrics6)
            logging.info("-----------Prediction Tail... M-1 -------------")
            log_metrics("Valid", step, metrics7)
            logging.info("-----------Prediction Tail... M-M -------------")
            log_metrics("Valid", step, metrics8)
            if metrics["HITS@1"] >= max_hit1 or metrics["MRR"] >= max_mrr:
                if metrics["HITS@1"] > max_hit1:
                    max_hit1 = metrics["HITS@1"]
                if metrics["MRR"] > max_mrr:
                    max_mrr = metrics["MRR"]
                save_variable_list = {
                    "step": step,
                    "current_lr": current_lr,
                }
                save_model(model, optimizer, save_variable_list)
            elif current_lr > 0.0000011:
                current_lr *= 0.1
                logging.info("Change learning_rate to %f at step %d" %
                             (current_lr, step))
                optimizer = get_optim("Adam", model, current_lr)
            else:
                logging.info(
                    "-------------------Training End-------------------")
                break
    # best state
    checkpoint = torch.load(os.path.join(config.save_path, "checkpoint"))
    model.load_state_dict(checkpoint["model_state_dict"])
    step = checkpoint["step"]
    logging.info(
        "-----------------Evaluating on Test Dataset-------------------")
    metrics = test_step(model, test_triples, all_true_triples, ent_num, rtp)
    metrics, metrics1, metrics2, metrics3, metrics4, metrics5, metrics6, metrics7, metrics8 = metrics
    logging.info("----------------Overall Results----------------")
    log_metrics("Test", step, metrics)
    logging.info("-----------Prediction Head... 1-1 -------------")
    log_metrics("Test", step, metrics1)
    logging.info("-----------Prediction Head... 1-M -------------")
    log_metrics("Test", step, metrics2)
    logging.info("-----------Prediction Head... M-1 -------------")
    log_metrics("Test", step, metrics3)
    logging.info("-----------Prediction Head... M-M -------------")
    log_metrics("Test", step, metrics4)
    logging.info("-----------Prediction Tail... 1-1 -------------")
    log_metrics("Test", step, metrics5)
    logging.info("-----------Prediction Tail... 1-M -------------")
    log_metrics("Test", step, metrics6)
    logging.info("-----------Prediction Tail... M-1 -------------")
    log_metrics("Test", step, metrics7)
    logging.info("-----------Prediction Tail... M-M -------------")
    log_metrics("Test", step, metrics8)
Пример #13
0
def train(model, triples, ent_num):
    logging.info("Start Training...")
    logging.info("batch_size = %d" % config.batch_size)
    logging.info("dim = %d" % config.ent_dim)
    logging.info("gamma = %f" % config.gamma)

    current_lr = config.learning_rate
    train_triples, valid_triples, test_triples, symmetry_test, inversion_test, composition_test, others_test = triples
    all_true_triples = train_triples + valid_triples + test_triples
    r_tp = rel_type(train_triples)

    optimizer = get_optim("Adam", model, current_lr)

    if config.init_checkpoint:
        logging.info("Loading checkpoint...")
        checkpoint = torch.load(os.path.join(config.save_path, "checkpoint"),
                                map_location=torch.device("cuda:0"))
        init_step = checkpoint["step"] + 1
        model.load_state_dict(checkpoint["model_state_dict"])
        if config.use_old_optimizer:
            current_lr = checkpoint["current_lr"]
            optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    else:
        init_step = 1

    true_all_heads, true_all_tails = get_true_ents(all_true_triples)
    train_iterator = train_data_iterator(train_triples, ent_num)
    test_data_list = test_data_sets(valid_triples, true_all_heads,
                                    true_all_tails, ent_num, r_tp)

    max_mrr = 0.0
    training_logs = []
    modes = ["Prediction Head", "Prediction Tail"]
    rtps = ["1-1", "1-M", "M-1", "M-M"]
    # Training Loop
    for step in range(init_step, config.max_step + 1):
        log = train_step(model, optimizer, next(train_iterator))
        training_logs.append(log)

        # log
        if step % config.log_step == 0:
            metrics = {}
            for metric in training_logs[0].keys():
                metrics[metric] = sum([log[metric] for log in training_logs
                                       ]) / len(training_logs)
            log_metrics("Training", step, metrics)
            training_logs.clear()

        # valid
        if step % config.valid_step == 0:
            logging.info("-" * 10 + "Evaluating on Valid Dataset" + "-" * 10)
            metrics = test_step(model, test_data_list, True)
            log_metrics("Valid", step, metrics[0])
            cnt_mode_rtp = 1
            for mode in modes:
                for rtp in rtps:
                    logging.info("-" * 10 + mode + "..." + rtp + "-" * 10)
                    log_metrics("Valid", step, metrics[cnt_mode_rtp])
                    cnt_mode_rtp += 1
            if metrics[0]["MRR"] >= max_mrr:
                max_mrr = metrics[0]["MRR"]
                save_variable_list = {
                    "step": step,
                    "current_lr": current_lr,
                }
                save_model(model, optimizer, save_variable_list)
            if step / config.max_step in [0.2, 0.5, 0.8]:
                current_lr *= 0.1
                logging.info("Change learning_rate to %f at step %d" %
                             (current_lr, step))
                optimizer = get_optim("Adam", model, current_lr)

    # load best state
    checkpoint = torch.load(os.path.join(config.save_path, "checkpoint"))
    model.load_state_dict(checkpoint["model_state_dict"])
    step = checkpoint["step"]

    # relation patterns
    test_datasets = [
        symmetry_test, inversion_test, composition_test, others_test
    ]
    test_datasets_str = ["Symmetry", "Inversion", "Composition", "Other"]
    for i in range(len(test_datasets)):
        dataset = test_datasets[i]
        dataset_str = test_datasets_str[i]
        if len(dataset) == 0:
            continue
        test_data_list = test_data_sets(dataset, true_all_heads,
                                        true_all_tails, ent_num, r_tp)
        logging.info("-" * 10 + "Evaluating on " + dataset_str + " Dataset" +
                     "-" * 10)
        metrics = test_step(model, test_data_list)
        log_metrics("Valid", step, metrics)

    # finally test
    test_data_list = test_data_sets(test_triples, true_all_heads,
                                    true_all_tails, ent_num, r_tp)
    logging.info("----------Evaluating on Test Dataset----------")
    metrics = test_step(model, test_data_list, True)
    log_metrics("Test", step, metrics[0])
    cnt_mode_rtp = 1
    for mode in modes:
        for rtp in rtps:
            logging.info("-" * 10 + mode + "..." + rtp + "-" * 10)
            log_metrics("Test", step, metrics[cnt_mode_rtp])
            cnt_mode_rtp += 1
Пример #14
0
def trainepoch(epoch):
    xp.Epoch.update(1).log()
    print('\nTRAINING : Epoch ' + str(epoch))
    nli_net.train()
    # shuffle the data
    permutation = np.random.permutation(len(train['s1']))

    s1 = train['s1'][permutation]
    s2 = train['s2'][permutation]
    target = train['label'][permutation]

    if epoch > 1 and params.opt == 'sgd':
        optimizer.param_groups[0]['lr'] *= params.decay
        optimizer.eta = optimizer.param_groups[0]['lr']

    xp.Timer_Train.reset()
    stats = {}

    for stidx in tqdm(range(0, len(s1), params.batch_size),
                      disable=not params.tqdm,
                      desc='Train Epoch',
                      leave=False):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size],
                                     word_vec)
        s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size],
                                     word_vec)
        s1_batch, s2_batch = s1_batch.cuda(), s2_batch.cuda()
        tgt_batch = torch.LongTensor(target[stidx:stidx +
                                            params.batch_size]).cuda()

        # model forward
        scores = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
        with set_smoothing_enabled(params.smooth_svm):
            loss = loss_fn(scores, tgt_batch)

        # backward
        optimizer.zero_grad()
        loss.backward()
        if params.opt != 'dfw':
            adapt_grad_norm(nli_net, params.max_norm)
        # necessary information for the step-size of some optimizers -> provide closure
        optimizer.step(lambda: float(loss))

        # track statistics for monitoring
        stats['loss'] = float(loss_fn(scores, tgt_batch))
        stats['acc'] = float(accuracy(scores, tgt_batch))
        stats['gamma'] = float(optimizer.gamma)
        stats['size'] = float(tgt_batch.size(0))
        update_metrics(xp, stats)

    xp.Eta.update(optimizer.eta)
    xp.Reg.update(regularization(nli_net, params.l2))
    xp.Obj_Train.update(xp.Reg.value + xp.Loss_Train.value)
    xp.Timer_Train.update()

    print('results : epoch {0} ; mean accuracy train : {1}'.format(
        epoch, xp.acc_train))
    print('\nEpoch: [{0}] (Train) \t'
          '({timer:.2f}s) \t'
          'Obj {obj:.3f}\t'
          'Loss {loss:.3f}\t'
          'Acc {acc:.2f}%\t'.format(int(xp.Epoch.value),
                                    timer=xp.Timer_Train.value,
                                    acc=xp.Acc_Train.value,
                                    obj=xp.Obj_Train.value,
                                    loss=xp.Loss_Train.value))

    log_metrics(xp)
Пример #15
0
def train(model, triples, entities, un_ents, un_rels, test_pairs):
    logging.info("---------------Start Training---------------")

    ht_1, ht_2 = get_r_hts(triples, un_rels)
    rel_seeds = relation_seeds({}, ht_1, ht_2, un_rels)

    current_lr = config.learning_rate
    optimizer = get_optim(model, current_lr)
    if config.init_checkpoint:
        logging.info("Loading checkpoint...")
        checkpoint = torch.load(os.path.join(config.save_path, "checkpoint"))
        init_step = checkpoint["step"] + 1
        model.load_state_dict(checkpoint["model_state_dict"])
        if config.use_old_optimizer:
            current_lr = checkpoint["current_lr"]
            optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    else:
        init_step = 1

    training_logs = []
    train_iterator = train_data_iterator(entities,
                                         new_triples(triples, rel_seeds, {}))
    # Training Loop
    for step in range(init_step, config.max_step):
        log = train_step(model, optimizer, next(train_iterator))
        training_logs.append(log)

        # log
        if step % config.log_step == 0:
            metrics = {}
            for metric in training_logs[0].keys():
                metrics[metric] = sum([log[metric] for log in training_logs
                                       ]) / len(training_logs)
            log_metrics("Training average", step, metrics)
            training_logs.clear()

        # warm up
        if step % config.warm_up_step == 0:
            current_lr *= 0.1
            logging.info("Change learning_rate to %f at step %d" %
                         (current_lr, step))
            optimizer = get_optim(model, current_lr)

        if step % config.update_step == 0:
            logging.info("Align entities and relations, swap parameters")
            seeds, align_e_1, align_e_2 = entity_seeds(model, un_ents)
            rel_seeds = relation_seeds(seeds, ht_1, ht_2, un_rels)
            new_entities = (entities[0] + align_e_2, entities[1] + align_e_1)
            train_iterator = train_data_iterator(
                new_entities, new_triples(triples, rel_seeds, seeds))
            save_variable_list = {
                "step": step,
                "current_lr": current_lr,
            }
            save_model(model, optimizer, save_variable_list)

    logging.info("---------------Test on test dataset---------------")
    metrics = test_step(model, test_pairs, un_ents)
    log_metrics("Test", config.max_step, metrics)

    logging.info("---------------Taining End---------------")
Пример #16
0
 def _():
     evaluator.run(eval_dataloader, epoch_length=config.eval_epoch_length)
     log_metrics(evaluator, "eval")
Пример #17
0
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learningRate,
                                weight_decay=args.weightDecay)

    # Learning
    for epoch_num in range(args.initEpochNum,
                           args.initEpochNum + args.nEpochs):
        trn_metrics = runModel(trn_data_gen,
                               model,
                               optimizer,
                               class_wts,
                               'trn',
                               args.batchSize,
                               trn_num_batches,
                               loss_wts=loss_wts)
        utils.log_metrics(epoch_num, trn_metrics, 'trn', log_file,
                          args.savename)
        torch.save(model.state_dict(), args.savename + '.pt')
        val_metrics = runModel(val_data_gen, model, optimizer, class_wts,
                               'val', args.batchSize, val_num_batches, None)
        utils.log_metrics(epoch_num, val_metrics, 'val', log_file,
                          args.savename)
        if best_val_record and val_metrics.AUROC > best_val:
            best_val = utils.save_chkpt(best_val_record, best_val, val_metrics,
                                        model, args.savename)
    tst_metrics = runModel(tst_data_gen, model, optimizer, class_wts, 'tst',
                           args.batchSize, tst_num_batches, None)
    utils.log_metrics(0, tst_metrics, 'tst', log_file, args.savename)
    # val_aggregator = Aggregator('val', task, val_data_loader)
    # val_aggregator.aggregate()
    # tst_aggregator = Aggregator('tst', task, tst_data_loader)
    # tst_aggregator.aggregate()
Пример #18
0
def train_model(args):
    dataloaders = get_dataloaders(args)

    dataset_sizes = {
        'train': len(dataloaders['train'].dataset),
        'val': len(dataloaders['val'].dataset),
        'test': len(dataloaders['test'].dataset)
    }

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # set up
    model = load_model(args, device)
    loss_fn = get_loss_fn(args.dataset, args.loss_type)

    if args.train_decoders:
        parameters = list(model.photo_decoder.parameters()) + list(
            model.sketch_decoder.parameters())
    elif args.model in ['EmbedGAN']:
        parameters = list(model.G.parameters()) + list(model.D.parameters())
    else:
        parameters = model.parameters()

    if args.optim == 'sgd':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              weight_decay=args.wd,
                              momentum=.9,
                              nesterov=True)
    elif args.optim == 'adam':
        optimizer = optim.Adam(parameters, lr=args.lr, weight_decay=args.wd)

    scheduler = optim.lr_scheduler.StepLR(
        optimizer, step_size=len(dataloaders['train']) // 10, gamma=.9)
    writer = SummaryWriter(args.log_dir + "/{}".format(args.name))

    save_dir = Path(args.save_dir) / ('{}'.format(args.name))
    if not save_dir.exists():
        os.mkdir(save_dir)

    best_model = None
    best_loss = float('inf')
    batch_num = 0

    for epoch in range(args.num_epochs):
        print('Epoch {}/{}'.format(epoch, args.num_epochs - 1))
        print('-' * 10)
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            epoch_metrics = defaultdict(float)

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                # zero the parameter gradients
                optimizer.zero_grad()

                N = len(inputs)

                # converts list of tuples of images paths of length N into flattened
                # tensor of size N * args.loss_type
                inputs = load_sketchy_images(inputs, args.loss_type, device,
                                             args.img_size)
                labels = labels.to(device)
                with torch.set_grad_enabled(phase == 'train'):
                    if args.loss_type in [
                            "vae", "vae+embed", "vae+embed+classify"
                    ]:
                        batch_metrics = vae_forward(
                            inputs,
                            labels,
                            model,
                            loss_fn,
                            writer,
                            device,
                            batch_num,
                            args.alpha,
                            N,
                            args.name,
                            modality=args.modality,
                            compare_embed=args.loss_type
                            in ["vae+embed", "vae+embed+classify"],
                            classify=args.loss_type
                            in ['vae+embed+classify', 'single_vae'])
                    elif args.loss_type in [
                            "ae", "ae+embed", "ae+embed+classify"
                    ]:
                        batch_metrics = ae_forward(
                            inputs,
                            labels,
                            model,
                            loss_fn,
                            writer,
                            device,
                            batch_num,
                            args.alpha,
                            N,
                            args.name,
                            modality=args.modality,
                            compare_embed=args.loss_type
                            in ["ae+embed", "ae+embed+classify"],
                            classify=args.loss_type
                            in ['ae+embed+classify', 'single_ae'])
                    elif args.loss_type in ['gan']:
                        batch_metrics = gan_forward(inputs, labels, model,
                                                    loss_fn, writer, device,
                                                    batch_num, N)
                    else:
                        batch_metrics = classify_contrast_forward(
                            inputs, labels, model, loss_fn, writer, device,
                            batch_num, args.alpha, args.loss_type, N)

                    for criteria_name in batch_metrics:
                        epoch_metrics[criteria_name] += batch_metrics[
                            criteria_name] / dataset_sizes[phase]

                    loss = batch_metrics['loss']

                    del batch_metrics

                    if phase == "train":
                        batch_num += 1
                        loss.backward()
                        optimizer.step()

            epoch_loss = epoch_metrics['loss'].item()
            log_metrics(epoch_metrics, writer, phase, epoch)

        # deep copy the model
        if phase == 'val' and epoch_loss < best_loss:
            best_loss = epoch_loss
            now = datetime.datetime.now()
            torch.save(
                model.state_dict(), save_dir /
                f"{now.month}{now.day}{now.hour}{now.minute}_{best_loss}")
            best_model = copy.deepcopy(model.state_dict())

    writer.close()
    now = datetime.datetime.now()
    torch.save(
        model.state_dict(), save_dir /
        f"end_{now.month}{now.day}{now.hour}{now.minute}_{best_loss}")

    # load best model weights
    model.load_state_dict(best_model)
    now = datetime.datetime.now()
    torch.save(model.state_dict(), save_dir / "best")
Пример #19
0
def ae_forward(inputs,
               labels,
               model,
               loss_fn,
               writer,
               device,
               batch_num,
               alpha,
               N,
               name,
               modality,
               compare_embed=False,
               classify=False):

    metrics = {}
    if modality in ['both', 'sketch']:
        sketches = torch.index_select(inputs, 0,
                                      torch.tensor(range(0, N)).to(device))
        sketch_embed, recon_sketch = model.forward(sketches, is_sketch=True)

    if modality in ['both', 'photo']:
        photos = torch.index_select(inputs, 0,
                                    torch.tensor(range(N, 2 * N)).to(device))
        photo_embed, recon_photo = model.forward(photos, is_sketch=False)

    if batch_num >= 500 and batch_num % 500 == 0:
        if modality in ['both', 'sketch']:
            tvutils.save_image(
                recon_sketch,
                '/home/robincheong/sketch2img/generated/{}_recon_sketch_{}.png'
                .format(name, batch_num))
            tvutils.save_image(
                sketches,
                '/home/robincheong/sketch2img/generated/{}_sketches_{}.png'.
                format(name, batch_num))
        if modality in ['both', 'photo']:
            tvutils.save_image(
                recon_photo,
                '/home/robincheong/sketch2img/generated/{}_recon_photo_{}.png'.
                format(name, batch_num))
            tvutils.save_image(
                photos,
                '/home/robincheong/sketch2img/generated/{}_photos_{}.png'.
                format(name, batch_num))

    if modality in ['both', 'sketch']:
        metrics['sketch_recon_loss'] = loss_fn(recon_sketch, sketches) * alpha
    if modality in ['both', 'photo']:
        metrics['photo_recon_loss'] = loss_fn(recon_photo, photos) * alpha

    if compare_embed and modality in ['both']:
        metrics['embed_loss'] = torch.sum(
            (sketch_embed - photo_embed)**2 / len(sketch_embed))
    else:
        metrics['embed_loss'] = 0

    if classify:
        ce_loss = nn.CrossEntropyLoss()
        if modality == 'photo':
            metrics['classify_loss'] = ce_loss(
                model.make_predictions(photo_embed), labels) * 10
        elif modality == 'sketch':
            metrics['classify_loss'] = ce_loss(
                model.make_predictions(sketch_embed), labels) * 10
        else:
            metrics['classify_loss'] = (ce_loss(model.make_predictions(photo_embed), labels) \
                                       + ce_loss(model.make_predictions(sketch_embed), labels)) * 10
    else:
        metrics['classify_loss'] = 0

    metrics['loss'] = 0
    for metric_name in metrics:
        if metric_name.endswith('loss') and metric_name != 'loss':
            metrics['loss'] += metrics[metric_name]

    log_metrics(metrics, writer, "batch", batch_num)

    return metrics
Пример #20
0
def active_train(
    log_dir: str,
    model: torch.nn.Module,
    model_path: str,
    unlabeled_dataset: conlldataloader.ConllDataSetUnlabeled,
    test_dataset: conlldataloader.ConllDataSet,

    # active learning parameters
    iterations: int,
    heuritic: active_heuristic.ActiveHeuristic,
    oracle: oracle.Oracle,
    sample_size: int,
    sampling_strategy: str,  # sampling, top_k

    # train parameters
    vocab: Vocab,
    tag_vocab: Vocab,
    batch_size: int,
    shuffle: bool,
    num_workers: int,
    num_epochs: float,
    learning_rate: float,
    weight_decay: float,
    momentum: float,
    optimizer_type: str,
    device: str,
    summary_file: str,
) -> None:
    logger = Logger(
        os.path.join(log_dir, "{}/".format(model_path)),
        summary_file=summary_file,
    )

    # random sample dataset into
    train_data = []

    test_data_loader = conlldataloader.get_data_loader(
        vocab,
        tag_vocab,
        test_dataset,
        1,  # batch_size
        False,  # no shuffle
        1,  # 1 worker
    )

    start_model = copy.deepcopy(model)

    iteration_samples = [1, 5, 10, 25, 50, 100]

    labeled_indexes = []

    for i, sample_size in enumerate(iteration_samples):
        if len(train_data) > 0:
            trainer = TrainLOG_ANALYSIS_FILESr(
                model=copy.LOG_ANALYSIS_FILESeepcopy(model),
                learning_raLOG_ANALYSIS_FILESe=learning_rate,
                weight_decay=weight_decay,
                momentum=momentum,
                optimizer_type=optimizer_type,
                vocab=vocab,
                tags=tag_vocab,
                batch_size=batch_size,
                shuffle=shuffle,
                num_workers=num_workers,
                train_dataset=train_data,
                test_dataset=test_dataset,
                logger=logger,
                device=device,
                verbose_print=True,
                verbose_log=True,
                train_label_fn=lambda data, index: (data[index]))

            train_data_loader = conlldataloader.get_data_loader(
                vocab,
                tag_vocab,
                train_data,
                batch_size,
                shuffle,
                num_workers,
                label_fn=lambda data, index: (data[index]),
            )

            trainer.train(num_epochs)
            model = trainer.get_best_model()
            ts = len(train_data)
            if constants.LOG_ANALYSIS_FILES:
                with open(f'help/{model_path}_analyze_test_{ts}.txt',
                          'w') as log_file:
                    with open(f'help/{model_path}_analyze_test_{ts}.csv',
                              'w') as csv_file:
                        utils.analyze_predictions(
                            trainer.get_best_model(),
                            test_data_loader,
                            vocab,
                            tag_vocab,
                            log_file,
                            csv_file,
                            device,
                        )
                with open(f'help/{model_path}_analyze_train_{ts}.txt',
                          'w') as log_file:
                    with open(f'help/{model_path}_analyze_train_{ts}.csv',
                              'w') as csv_file:
                        utils.analyze_predictions(
                            trainer.get_best_model(),
                            train_data_loader,
                            vocab,
                            tag_vocab,
                            log_file,
                            csv_file,
                            device,
                        )

        if (i == len(iteration_samples) - 1):
            break
        # compute valid metrics
        f1_data, acc = utils.compute_f1_dataloader(model,
                                                   test_data_loader,
                                                   tag_vocab,
                                                   device=device)
        f1_avg_valid = utils.compute_avg_f1(f1_data)

        # log valid metics
        logger.scalar_summary("active valid f1", f1_avg_valid, len(train_data))
        logger.scalar_summary("active valid accuracy", acc, len(train_data))
        utils.log_metrics(logger, f1_data, "active valid", len(train_data))

        # select new points from distribution
        if isinstance(heuritic, active_heuristic.KNNEmbeddings):
            distribution = heuritic.evaluate_with_labeled(
                model=model,
                dataset=unlabeled_dataset,
                labeled_indexes=labeled_indexes,
                labeled_points=train_data,
                device=device)
        else:
            distribution = heuritic.evaluate(model, unlabeled_dataset, device)
        new_points = []
        sample_size = min(sample_size, len(distribution) - 1)
        if sampling_strategy == constants.ACTIVE_LEARNING_SAMPLE:
            new_points = torch.multinomial(distribution, sample_size)
        elif sampling_strategy == constants.ACTIVE_LEARNING_TOP_K:
            new_points = sorted(range(len(distribution)),
                                reverse=True,
                                key=lambda ind: distribution[ind])
        new_points = new_points[:sample_size]

        # use new points to augment train_dataset
        # remove points from unlabaled corpus
        query = [unlabeled_dataset.data[ind] for ind in new_points]

        labeled_indexes.extend(ind for (ind, _) in query)

        outputs = [oracle.get_label(q) for q in query]

        # move unlabeled points to labeled points
        [unlabeled_dataset.remove(q) for q in query]

        train_data.extend(outputs)
    logger.flush()
Пример #21
0
 def _():
     evaluator.run(test_loader, epoch_length=config.eval_epoch_length)
     log_metrics(evaluator, tag="eval")