def train_model(self):
        best_acc = 0.0

        print("Beginning Training for", self.epochs, " Epochs")
        for epoch in range(1, self.epochs + 1):
            if epoch == 80:
                self.lr = 0.01
                for group in self.optimizer.param_groups:
                    group['lr'] = self.lr
            elif epoch == 140:
                self.lr = 0.001
                for group in self.optimizer.param_groups:
                    group['lr'] = self.lr

            train_utils.train(self, epoch)
            acc, loss = train_utils.evaluate(self)
            # acc = round(acc.item(), 4)

            # Save best performance model
            if best_acc < acc:
                best_model_wts = copy.deepcopy(self.model.state_dict())
                best_epoch = epoch
                best_acc = acc
                best_loss = loss
        print(f"Saving best model: Loss={best_loss}, Acc={best_acc}, Ep={best_epoch}")
        # Save Best model
        torch.save(best_model_wts, self.checkpoint_path.format(epoch=best_epoch, acc=best_acc))

        # Record Metrics
        self.overall_log.append(
            {"Experiment": self.exp_name, "Epoch": best_epoch, "Test_Acc": round(best_acc * 100, 2),
             "Test_Loss": best_loss})
        train_utils.record_overall_metrics(self, ['Experiment', 'Epoch', "Test_Acc", "Test_Loss"])
    def fine_tune(self):
        best_acc = 0.0
        self.lr = 0.01
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.9, weight_decay=5e-4)
        print("Beginning Training for", self.epochs, " Epochs")
        for epoch in range(1, 41):
            if epoch == 10:
                self.lr = self.lr * 0.1
                for group in self.optimizer.param_groups:
                    group['lr'] = self.lr
            elif epoch == 20:
                self.lr = self.lr * 0.1
                for group in self.optimizer.param_groups:
                    group['lr'] = self.lr

            train_utils.train(self, epoch)
            acc, loss = train_utils.evaluate(self)
            acc = round(acc.item(), 4)

            # Save best performance model
            if best_acc < acc:
                best_model_wts = copy.deepcopy(self.model.state_dict())
                best_epoch = epoch
                best_acc = acc
                best_loss = loss
        # Save Best model
        # torch.save(best_model_wts, self.checkpoint_path.format(epoch=best_epoch, acc=round(best_acc * 100, 2)))

        # Record Metrics
        self.overall_log.append(
            {"Experiment": self.exp_name, "Epoch": best_epoch, "Test_Acc": best_acc,
             "Test_Loss": best_loss})
        train_utils.record_overall_metrics(self, ['Experiment', 'Epoch', "Test_Acc", "Test_Loss"])
Exemplo n.º 3
0
    def train_model(self):
        best_acc = 0.0
        print("Beginning Training for", self.epochs, " Epochs")

        for epoch in range(1, self.epochs + 1):
            if epoch == 80:
                self.lr = 0.01
                for group in self.optimizer.param_groups:
                    group['lr'] = self.lr
            elif epoch == 140:
                self.lr = 0.001
                for group in self.optimizer.param_groups:
                    group['lr'] = self.lr

            train_utils.train(self, epoch)
            acc, loss = train_utils.evaluate(self)
            acc = round(acc.item(), 4)
            loss = round(loss, 4)

            # Save best performance model
            if best_acc < acc:
                best_model_wts = copy.deepcopy(self.model.state_dict())
                best_epoch = epoch
                best_acc = acc
                best_loss = loss

        # Save Best model
        # torch.save(best_model_wts, self.model_path.format(task=self.task, epoch=best_epoch, acc=round(best_acc * 100, 2)))

        # Record Metrics
        train_utils.record_metrics(self)

        self.overall_log.append(
            {"Task": self.task, "Epoch": best_epoch, "Test_Acc": round(best_acc * 100, 2), "Test_Loss": best_loss})
        train_utils.record_overall_metrics(self)
Exemplo n.º 4
0
def main():
    start_time = time()

    in_arg = get_args_train()

    data_dir = in_arg.data_dir

    device = get_device(in_arg.gpu)
    #     print(device)
    dataloaders = get_dataloaders(data_dir)

    criterion = get_criterion()

    model = get_model(device=device,
                      arch=in_arg.arch,
                      hidden_units=in_arg.hidden_units,
                      data_dir=in_arg.data_dir,
                      save_dir=in_arg.save_dir)
    # print(model)

    optimizer = get_optimizer(model, in_arg.learning_rate)
    #     print(optimizer)

    train(model,
          criterion,
          optimizer,
          epochs=in_arg.epochs,
          device=device,
          train_loader=dataloaders['train'],
          valid_loader=dataloaders['valid'])

    tot_time = time() - start_time
    print(f"\n** Total Elapsed Runtime: {tot_time:.3f} seconds")
    def finetune_classifier(self, task, ittr="0"):
        print('-' * 50)
        print("Training task:\t", task)
        self.data_loaders = train_utils.CIFAR_dl_task(self, task,
                                                      self.per_task_norm)
        best_acc = 0.0

        # Setup Model
        model = self.backbone_model
        for param in model.parameters():
            param.requires_grad = False
        model.fc = nn.Linear(512, 5)
        self.model = model.to(self.device)
        self.lr = 0.01
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=self.lr,
                                         momentum=0.9,
                                         weight_decay=5e-4)

        print("Finetuning for", self.epochs, " Epochs")
        for epoch in range(1, self.epochs + 1):
            if epoch == 10:
                self.lr = self.lr * 0.1
                for group in self.optimizer.param_groups:
                    group['lr'] = self.lr
            elif epoch == 20:
                self.lr = self.lr * 0.1
                for group in self.optimizer.param_groups:
                    group['lr'] = self.lr

            train_utils.train(self, epoch)
            acc, loss = train_utils.evaluate(self)
            acc = round(acc.item(), 4)
            loss = round(loss, 4)

            # Save best performance model
            if best_acc < acc:
                best_model_wts = copy.deepcopy(self.model.state_dict())
                best_acc = acc
                best_loss = loss
                best_epoch = epoch

        # Save Best model
        torch.save(
            best_model_wts,
            self.classifier_path.format(exp=ittr,
                                        task=task,
                                        epoch=best_epoch,
                                        acc=round(best_acc * 100, 2)))
        # Record Metrics
        self.classifier_results.append({
            "Task": task,
            "Acc": round(best_acc * 100, 2),
            "Loss": best_loss
        })
Exemplo n.º 6
0
def main():
    config = [(64, 3, 1, 1), (64, 3, 1, 1), (1, 3, 1, 1)]
    #config = [(64, 9, 1, 4), (32, 1, 1, 0), (3, 5, 1, 2)]
    #config = [(64, 9, 1, 0), (32, 1, 1, 0), (3, 5, 1, 0)]
    # config: (output_ch, kernel_size, stride, padding_size)
    model = SRCNN(config).to(DEVICE)
    loss_function = nn.MSELoss(reduction='mean')
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    test_data = generate_data('test')
    for epoch in range(EPOCH):
        train_data = generate_data('train')
        train(model, train_data, loss_function, optimizer, DEVICE)
        test(model, test_data, loss_function, epoch, DEVICE)
def training():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data_root = "/home/glazkova/ProbabilisticUnet/data"

    img_transform_func = transforms.Compose([
        transforms.Resize((256, 512), interpolation=PIL.Image.BILINEAR),
        transforms.ToTensor(),
    ])

    labels_transform_func = transforms.Compose([
        transforms.Resize((256, 512), interpolation=PIL.Image.NEAREST),
        transforms.Lambda(lambda x: id_to_train_id[x]),
        transforms.ToTensor()
    ])

    train_dataset = TransformedCityDataset(root=data_root,
                                           mode="fine",
                                           split="train",
                                           target_type="semantic")

    test_dataset = datasets.Cityscapes(root=data_root,
                                       mode="fine",
                                       split="val",
                                       target_type="semantic",
                                       transform=img_transform_func,
                                       target_transform=labels_transform_func)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=train_batch_size)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=test_batch_size)

    #iter_num = 240000
    #n_epochs = iter_num // (len(train_dataset) // batch_size)
    n_epochs = 100

    model = ProbUNet(num_classes, latent_space_size)
    #model.load_state_dict(torch.load("results/model"))
    model.cuda()
    opt = torch.optim.Adam(model.parameters(), lr=0.0001)
    scheduler = StepLR(opt, step_size=5, gamma=0.9)
    train_utils.train(model,
                      opt,
                      scheduler,
                      n_epochs,
                      train_loader,
                      test_loader,
                      save_path="results/final_3D/")
Exemplo n.º 8
0
def main_(model_main):
#def main(CFG): 
    model = model_main.to(CFG['device'])     
      
# 模型训练、补训、测试 #################################################################        
    if CFG['train_or_test'] =='train': 
        #optimizer = optim.Adadelta(model.parameters(), lr=CFG['lr'])          # 求解器
        optimizer = optim.SGD(model.parameters(), lr=CFG['lr'], momentum=CFG['momentum'],)   
        scheduler = StepLR(optimizer, step_size=2, gamma=CFG['gamma'])        # 学习速率衰减方式
        result_trace = np.zeros([1,7])
        loss_trace = np.zeros([1,12])    # 逐个记录每个epoch的loss和acc
        acc_trace = np.zeros([1,12])      
        for epoch in range(0,20):                                # 遍历每个epoch
            start_time = datetime.datetime.now()          # 训练开始时间
            train_loss_epoch_i, train_acc_epoch_i = train(CFG, model, train_loader, optimizer, epoch)   # 
            end_time = datetime.datetime.now();   time_cost = (end_time - start_time).seconds ;   print('耗时:',time_cost)             # 训练耗时
            test_loss_epoch_i, test_acc_epoch_i, f4t_and_label = test(CFG, model, test_loader, is_print=True)

            result_epoch_i = [epoch, train_acc_epoch_i[0], train_loss_epoch_i[0], test_acc_epoch_i[0],test_loss_epoch_i[0], scheduler.get_lr()[0],time_cost]
            
            result_trace = np.vstack([result_trace,  np.array(result_epoch_i).reshape(1,len(result_epoch_i))]) 
            loss_trace = np.vstack([loss_trace,  np.array([train_loss_epoch_i + test_loss_epoch_i]).reshape(1,12)]) 
            acc_trace = np.vstack([acc_trace,  np.array([train_acc_epoch_i + test_acc_epoch_i]).reshape(1,12)]) 
            if epoch>0:
                scheduler.step()     
        if CFG['save_model'] :  
            pt_name = '[STI]_' + model.name+ '_'+CFG['dataset_choose'] +'_epoch'+str(0)+'to'+str(epoch) +'_sample'+str(CFG['NO_train'][0])+'to'+str(CFG['NO_train'][1])     # 文件主命名,  STI表示Source Trained In
        #    plot_curve(result_trace[:,0],[result_trace[:,1],result_trace[:,3]],'结果图/'+pt_name+'_ACC.png',  xlabel='Epoch',ylabel='ACC',title='ACC',legend=['Training_Accuracy','Testing_Accuracy'])
            pt = {'model':model.state_dict(), 'optimizer':optimizer.state_dict(),'scheduler':scheduler.state_dict(),'CFG':CFG,
                  'model_name':model.name, 'result_trace':result_trace, 'loss_trace':loss_trace, 'acc_trace':acc_trace }
            torch.save(pt,    pt_name  + '_' + device.type + '_rand' + str(CFG['random_seed']) + '.pt')
Exemplo n.º 9
0
def main():
    generator = srgan.SRGAN_gen().to(device)
    discriminator = srgan.SRGAN_dis().to(device)

    params = list(generator.parameters()) + list(discriminator.parameters())
    optimizer = optim.Adam(params, lr=1e-4)
    trainset = TrainDataset()
    train_loader = DataLoader(dataset=trainset,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
    test_data = Image.open('./SR_dataset/Set5/001_HR.png')
    test_data = transforms.ToTensor()(test_data)
    test_data = test_data.unsqueeze(0)
    test_data = test_data.to(device)
    for epoch in range(10000):
        train(generator, discriminator, optimizer, train_loader, device, epoch)
        if epoch % 1000 == 0:
            test(generator, discriminator, test_data, epoch, device)
Exemplo n.º 10
0
def fit():
    (
        train_img,
        test_img,
        train_labels,
        test_labels,
        train_orig_labels,
        test_orig_targets,
    ) = model_selection.train_test_split(IMAGES,
                                         LABELS_ENCODED,
                                         LABELS_NAMES,
                                         test_size=0.1,
                                         random_state=2020)

    train_dataset = OcrDataset(image_path=train_img,
                               labels=train_labels,
                               resize=(IMAGE_HEIGHT, IMAGE_WIDTH))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               num_workers=NUM_WORKERS,
                                               shuffle=True)

    test_dataset = OcrDataset(image_path=test_img,
                              labels=test_labels,
                              resize=(IMAGE_HEIGHT, IMAGE_WIDTH))

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=BATCH_SIZE,
                                              num_workers=NUM_WORKERS,
                                              shuffle=False)

    model = OcrModel_v0(num_characters=len(labels_encoded.classes_))
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           factor=0.8,
                                                           patience=2,
                                                           verbose=True)

    for epoch in range(EPOCHS):
        train_loss = train(model, train_loader, optimizer)
        valid_preds, valid_loss = evaluate(model, test_loader)
        valid_final_preds = []

        for pred in valid_preds:
            # print(pred)
            cur_preds = decode_preds(pred, labels_encoded)
            valid_final_preds.extend(cur_preds)
        show_preds_list = list(zip(test_orig_targets, valid_final_preds))[1:3]
        pprint(show_preds_list)
        pprint("-" * 90)
        pprint(
            f"Epoch: {epoch} | Train loss = {train_loss} | Valid loss = {valid_loss} |"
        )
        pprint("-" * 90)
Exemplo n.º 11
0
def main(args):
    train_loader, val_loader = custom_data_loader.customDataloader(args)

    model = custom_model.buildModel(args)
    optimizer, scheduler, records = solver_utils.configOptimizer(args, model)
    criterion = solver_utils.Criterion(args)
    recorder  = recorders.Records(args.log_dir, records)

    tf_train_writer, tf_test_writer = tfboard.tensorboard_init()

    for epoch in range(args.start_epoch, args.epochs+1):
        scheduler.step()
        recorder.insertRecord('train', 'lr', epoch, scheduler.get_lr()[0])

        train_utils.train(args, train_loader, model, criterion, optimizer, log, epoch, recorder, tf_train_writer)
        if epoch % args.save_intv == 0: 
            model_utils.saveCheckpoint(args.cp_dir, epoch, model, optimizer, recorder.records, args)

        if epoch % args.val_intv == 0:
            test_utils.test(args, 'val', val_loader, model, log, epoch, recorder, tf_test_writer)
Exemplo n.º 12
0
def my_main(_run, lr, weight_decay, message, use_gpu, epochs, save_images,
            experiment_folder):
    print(message)
    print("Use gpu: {}".format(use_gpu))
    # print(_run)
    # create_dirs()
    model = ColorNet()
    criterion = nn.MSELoss()
    if use_gpu:
        criterion = criterion.cuda()
        model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    train_folder = "places365_standard/train"
    val_folder = "places365_standard/val"
    train_loader = get_train_loader(train_folder)
    validation_loader = get_val_loader(val_folder)
    os.makedirs(experiment_folder + "outputs/color", exist_ok=True)
    os.makedirs(experiment_folder + "outputs/gray", exist_ok=True)
    os.makedirs(experiment_folder + "checkpoints", exist_ok=True)
    best_losses = 1e10

    print("Epochs: {}".format(epochs))

    for epoch in range(epochs):
        # Train for one epoch, then validate
        train(train_loader, model, criterion, optimizer, epoch, _run)
        with torch.no_grad():
            losses = validate(validation_loader, model, criterion, save_images,
                              epoch, _run)
        # Save checkpoint and replace old best model if current model is better
        if losses < best_losses:
            best_losses = losses
            torch.save(
                model.state_dict(),
                experiment_folder +
                "checkpoints/model-epoch-{}-losses-{:.3f}.pth".format(
                    epoch + 1, losses),
            )
Exemplo n.º 13
0
print("train_dataset.labels.shape", train_dataset.labels.shape)
print("test_dataset.labels.shape", test_dataset.labels.shape)
print("train_dataset",train_dataset)
print("test_dataset",test_dataset)

# create models
if "densenet" in cfg.model:
    model = xrv.models.DenseNet(num_classes=train_dataset.labels.shape[1], in_channels=1,
                                **xrv.models.get_densenet_params(cfg.model))
elif "resnet101" in cfg.model:
    model = torchvision.models.resnet101(num_classes=train_dataset.labels.shape[1], pretrained=False)
    #patch for single channel
    model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

elif "shufflenet_v2_x2_0" in cfg.model:
    model = torchvision.models.shufflenet_v2_x2_0(num_classes=train_dataset.labels.shape[1], pretrained=False)
    #patch for single channel
    model.conv1[0] = torch.nn.Conv2d(1, 24, kernel_size=3, stride=2, padding=1, bias=False)
else:
    raise Exception("no model")


train_utils.train(model, train_dataset, cfg)


print("Done")
# test_loader = torch.utils.data.DataLoader(test_dataset,
#                                            batch_size=cfg.batch_size,
#                                            shuffle=cfg.shuffle,
#                                            num_workers=0, pin_memory=False)
Exemplo n.º 14
0
def main(tiny_images=None,
         model="cnn",
         augment=False,
         use_scattering=False,
         batch_size=2048,
         mini_batch_size=256,
         lr=1,
         lr_start=None,
         optim="SGD",
         momentum=0.9,
         noise_multiplier=1,
         max_grad_norm=0.1,
         epochs=100,
         bn_noise_multiplier=None,
         max_epsilon=None,
         data_size=550000,
         delta=1e-6,
         logdir=None):
    logger = Logger(logdir)

    device = get_device()

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size

    train_data, test_data = get_data("cifar10", augment=augment)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=100,
                                               shuffle=False,
                                               num_workers=4,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=100,
                                              shuffle=False,
                                              num_workers=4,
                                              pin_memory=True)

    if isinstance(tiny_images, torch.utils.data.Dataset):
        train_data_aug = tiny_images
    else:
        print("loading tiny images...")
        train_data_aug, _ = get_data("cifar10_500K",
                                     augment=augment,
                                     aux_data_filename=tiny_images)

    scattering, K, (h, w) = None, None, (None, None)
    pre_scattered = False
    if use_scattering:
        scattering, K, (h, w) = get_scatter_transform("cifar10_500K")
        scattering.to(device)

    # if the whole data fits in memory, pre-compute the scattering
    if use_scattering and data_size <= 50000:
        loader = torch.utils.data.DataLoader(train_data_aug,
                                             batch_size=100,
                                             shuffle=False,
                                             num_workers=4)
        train_data_aug = get_scattered_dataset(loader, scattering, device,
                                               data_size)
        pre_scattered = True

    assert data_size <= len(train_data_aug)
    num_sup = min(data_size, 50000)
    num_batches = int(np.ceil(50000 / mini_batch_size))  # cifar-10 equivalent

    train_batch_sampler = SemiSupervisedSampler(data_size, num_batches,
                                                mini_batch_size)
    train_loader_aug = torch.utils.data.DataLoader(
        train_data_aug,
        batch_sampler=train_batch_sampler,
        num_workers=0 if pre_scattered else 4,
        pin_memory=not pre_scattered)

    rdp_norm = 0
    if model == "cnn":
        if use_scattering:
            save_dir = f"bn_stats/cifar10_500K"
            os.makedirs(save_dir, exist_ok=True)
            bn_stats, rdp_norm = scatter_normalization(
                train_loader,
                scattering,
                K,
                device,
                data_size,
                num_sup,
                noise_multiplier=bn_noise_multiplier,
                orders=ORDERS,
                save_dir=save_dir)
            model = CNNS["cifar10"](K, input_norm="BN", bn_stats=bn_stats)
            model = model.to(device)

            if not pre_scattered:
                model = nn.Sequential(scattering, model)
        else:
            model = CNNS["cifar10"](in_channels=3, internal_norm=False)

    elif model == "linear":
        save_dir = f"bn_stats/cifar10_500K"
        os.makedirs(save_dir, exist_ok=True)
        bn_stats, rdp_norm = scatter_normalization(
            train_loader,
            scattering,
            K,
            device,
            data_size,
            num_sup,
            noise_multiplier=bn_noise_multiplier,
            orders=ORDERS,
            save_dir=save_dir)
        model = ScatterLinear(K, (h, w), input_norm="BN", bn_stats=bn_stats)
        model = model.to(device)

        if not pre_scattered:
            model = nn.Sequential(scattering, model)
    else:
        raise ValueError(f"Unknown model {model}")
    model.to(device)

    if pre_scattered:
        test_loader = get_scattered_loader(test_loader, scattering, device)

    print(f"model has {get_num_params(model)} parameters")

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        bs,
        data_size,
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    best_acc = 0
    flat_count = 0

    for epoch in range(0, epochs):

        print(f"\nEpoch: {epoch} ({privacy_engine.steps} steps)")
        train_loss, train_acc = train(model,
                                      train_loader_aug,
                                      optimizer,
                                      n_acc_steps=n_acc_steps)
        test_loss, test_acc = test(model, test_loader)

        if noise_multiplier > 0:
            print(f"sample_rate={privacy_engine.sample_rate}, "
                  f"mul={privacy_engine.noise_multiplier}, "
                  f"steps={privacy_engine.steps}")
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_norm + rdp_sgd,
                                           target_delta=delta)
            epsilon2, _ = get_privacy_spent(rdp_sgd, target_delta=delta)
            print(f"ε = {epsilon:.3f} (sgd only: ε = {epsilon2:.3f})")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
        logger.log_scalar("epsilon/train", epsilon, epoch)
        logger.log_scalar("cifar10k_loss/train", train_loss, epoch)
        logger.log_scalar("cifar10k_acc/train", train_acc, epoch)

        if test_acc > best_acc:
            best_acc = test_acc
            flat_count = 0
        else:
            flat_count += 1
            if flat_count >= 20:
                print("plateau...")
                return
Exemplo n.º 15
0
                         batch_size=BATCH_SIZE,
                         shuffle=True,
                         collate_fn=pad_collate)
model = BilstmAspectAttPool(Configs1())
initialize_weights(model)
print(model)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

best_valid_loss = float('inf')
for epoch in range(EPOCHS):

    start_time = time.time()

    train_loss = train(model, train_loader, optimizer, criterion, CLIP, device)
    valid_loss = evaluate(model, test_loader, criterion, device)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), model_name)

    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(
        f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}'
    )
    print(
Exemplo n.º 16
0
def main():
    args = parse_args()
    args.num_gpus = len(get_available_gpus())
    eval(args.NET + '_setup')(args)
    set_seed(args.seed)
    setup(args)

    # Create model and optimizer
    if args.resume or args.eval or args.benchmark:
        last_epoch, best_epoch, best_val_loss, num_params, \
            enc_params, dec_params = parse_experiment(args.odir)
        i = last_epoch
        if args.eval or args.benchmark:
            i = best_epoch
        args.resume = model_at(args, i)
        model, stats = tf_resume(args, i)
    else:
        check_overwrite(os.path.join(args.odir, 'trainlog.txt'))
        model = eval(args.NET + '_create_model')(args)
        stats = []

    print('Will save to ' + args.odir)
    if not os.path.exists(args.odir):
        os.makedirs(args.odir)
    if not os.path.exists(args.odir + '/models'):
        os.makedirs(args.odir + '/models')
    with open(os.path.join(args.odir, 'cmdline.txt'), 'w') as f:
        f.write(" ".join([
            "'" + a + "'" if (len(a) == 0 or a[0] != '-') else a
            for a in sys.argv
        ]))

    args.model = model
    args.step = eval(args.NET + '_step')

    # Training loop
    epoch = args.start_epoch
    train_data_queue, train_data_processes = data_setup(args,
                                                        'train',
                                                        args.nworkers,
                                                        repeat=True)
    if args.eval == 0:
        for epoch in range(args.start_epoch, args.epochs):
            print('Epoch {}/{} ({}):'.format(epoch + 1, args.epochs,
                                             args.odir))

            loss = train(args, epoch, train_data_queue,
                         train_data_processes)[0]

            if (epoch +
                    1) % args.test_nth_epoch == 0 or epoch + 1 == args.epochs:
                loss_val = test('val', args)[0]
                print('-> Train Loss: {}, \tVal loss: {}'.format(
                    loss, loss_val))
                stats.append({
                    'epoch': epoch + 1,
                    'loss': loss,
                    'loss_val': loss_val
                })
            else:
                loss_val = 0
                print('-> Train loss: {}'.format(loss))
                stats.append({'epoch': epoch + 1, 'loss': loss})

            if (epoch +
                    1) % args.save_nth_epoch == 0 or epoch + 1 == args.epochs:
                with open(os.path.join(args.odir, 'trainlog.txt'),
                          'w') as outfile:
                    json.dump(stats, outfile)

                save_model(args, epoch)
            if (epoch +
                    1) % args.test_nth_epoch == 0 and epoch + 1 < args.epochs:
                split = 'val'
                predictions = samples(split, args, 20)
                cache_pred(predictions, split, args)
                metrics(split, args, epoch)

            if math.isnan(loss): break

        if len(stats) > 0:
            with open(os.path.join(args.odir, 'trainlog.txt'), 'w') as outfile:
                json.dump(stats, outfile)

    kill_data_processes(train_data_queue, train_data_processes)

    split = 'val'
    predictions = samples(split, args, 20)
    cache_pred(predictions, split, args)
    metrics(split, args, epoch)
    if args.benchmark:
        benchmark_results('test', args)
Exemplo n.º 17
0
    def fit(self, X, y):
        x_train, x_valid, y_train, y_valid = train_test_split(X,
                                                              y,
                                                              test_size=0.2)

        # from some github repo...
        torch.multiprocessing.set_sharing_strategy('file_system')

        args = self.args

        args.input_dim = X.shape[1]
        args.output_dim = 1
        args.task = 'regression'

        use_cuda = not args.no_cuda and torch.cuda.is_available()

        torch.manual_seed(args.seed)
        np.random.seed(args.seed)
        device = torch.device("cuda" if use_cuda else "cpu")
        self.device = device

        train_loader = basic_loader(x_train, y_train, args.batch_size)
        valid_loader = basic_loader(x_valid,
                                    y_valid,
                                    args.batch_size,
                                    train_shuffle=False)

        # train_loader, valid_loader, test_loader = get_data_loaders(args.dataset, args.batch_size,
        #                                                            sub_task=args.sub_task, dim=args.input_dim)

        # if args.dataset in ['sider_split/', 'tox21_split/']:
        #     args.dataset = args.dataset[:-1] + '-' + str(args.sub_task)

        print('batch number: train={}, valid={}'.format(
            len(train_loader), len(valid_loader)))

        model = Net(input_dim=args.input_dim,
                    output_dim=args.output_dim,
                    hidden_dim=args.hidden_dim,
                    num_layer=args.depth,
                    num_back_layer=args.back_n,
                    dense=True,
                    drop_type=args.drop_type,
                    net_type=args.net_type,
                    approx=args.anneal,
                    device=device).to(device)
        self.model = model

        if args.optimizer == 'SGD':
            optimizer = optim.SGD(model.parameters(),
                                  lr=args.lr,
                                  momentum=args.momentum,
                                  nesterov=True)
        elif args.optimizer == 'AMSGrad':
            optimizer = optim.Adam(model.parameters(),
                                   lr=args.lr,
                                   amsgrad=True)
        scheduler = StepLR(optimizer,
                           step_size=args.lr_step_size,
                           gamma=args.gamma)

        best_score = -1e30
        start_epoch = 1  # start from epoch 1 or last checkpoint epoch
        if args.anneal == 'approx':
            args.net_type = 'approx_' + args.net_type

        best_model_name = './checkpoint/{}/{}/best_seed{}_depth{}_ckpt.t7'.format(
            args.dataset.strip('/'), args.net_type, args.seed, args.depth)
        last_model_name = './checkpoint/{}/{}/last_seed{}_depth{}_ckpt.t7'.format(
            args.dataset.strip('/'), args.net_type, args.seed, args.depth)

        best_log_file = 'log/' + args.dataset.strip(
            '/') + '/{}/depth{}_backn{}_drop{}_p{}_best.log'.format(
                args.net_type, args.depth, args.back_n, args.drop_type, args.p)
        last_log_file = 'log/' + args.dataset.strip(
            '/') + '/{}/depth{}_backn{}_drop{}_p{}_last.log'.format(
                args.net_type, args.depth, args.back_n, args.drop_type, args.p)

        model_dir = './checkpoint/{}/{}/'.format(args.dataset.strip('/'),
                                                 args.net_type)
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        log_dir = 'log/' + args.dataset.strip('/') + '/{}/'.format(
            args.net_type)
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')

        for epoch in range(start_epoch, args.epochs + start_epoch):
            scheduler.step(epoch)

            alpha = get_alpha(epoch, args.epochs)
            train_approximate_loss = train(args, model, device, train_loader,
                                           optimizer, epoch, args.anneal,
                                           alpha)

            # used for plotting learning curves
            train_loss, train_score = test(args, model, device, train_loader,
                                           'train')
            valid_loss, valid_score = test(args, model, device, valid_loader,
                                           'valid')
            # test_loss, test_score = test(args, model, device, test_loader, 'test')

            print(train_score, valid_score)
            # early stopping version
            if valid_score > best_score:
                self.best_state = model.state_dict()
                state = {'model': model.state_dict()}
                torch.save(state, best_model_name)
                best_score = valid_score

            # "convergent" version
            state = {'model': model.state_dict()}
            torch.save(state, last_model_name)

        # print('Training finished. Loading models from validation...')
        # for model_name, log_file, setting in zip([best_model_name, last_model_name], [best_log_file, last_log_file],
        #                                          ['best', 'last']):
        #     print('\nLoading the {} model...'.format(setting))
        #
        #     checkpoint = torch.load(model_name)
        #     model.load_state_dict(checkpoint['model'])
        #     train_loss, train_score = test(args, model, device, train_loader, 'train')
        #     valid_loss, valid_score = test(args, model, device, valid_loader, 'valid')
        # test_loss, test_score = test(args, model, device, test_loader, 'test ')
        return self
Exemplo n.º 18
0
def main(dataset,
         augment=False,
         use_scattering=False,
         size=None,
         batch_size=2048,
         mini_batch_size=256,
         sample_batches=False,
         lr=1,
         optim="SGD",
         momentum=0.9,
         nesterov=False,
         noise_multiplier=1,
         max_grad_norm=0.1,
         epochs=100,
         input_norm=None,
         num_groups=None,
         bn_noise_multiplier=None,
         max_epsilon=None,
         logdir=None,
         early_stop=True,
         seed=0):
    torch.manual_seed(seed)
    logger = Logger(logdir)
    device = get_device()

    train_data, test_data = get_data(dataset, augment=augment)

    if use_scattering:
        scattering, K, _ = get_scatter_transform(dataset)
        scattering.to(device)
    else:
        scattering = None
        K = 3 if len(train_data.data.shape) == 4 else 1

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size

    # Batch accumulation and data augmentation with Poisson sampling isn't implemented
    if sample_batches:
        assert n_acc_steps == 1
        assert not augment

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=mini_batch_size,
                                               shuffle=True,
                                               num_workers=1,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=mini_batch_size,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True)

    rdp_norm = 0
    if input_norm == "BN":
        # compute noisy data statistics or load from disk if pre-computed
        save_dir = f"bn_stats/{dataset}"
        os.makedirs(save_dir, exist_ok=True)
        bn_stats, rdp_norm = scatter_normalization(
            train_loader,
            scattering,
            K,
            device,
            len(train_data),
            len(train_data),
            noise_multiplier=bn_noise_multiplier,
            orders=ORDERS,
            save_dir=save_dir)
        model = CNNS[dataset](K, input_norm="BN", bn_stats=bn_stats, size=size)
    else:
        model = CNNS[dataset](K,
                              input_norm=input_norm,
                              num_groups=num_groups,
                              size=size)

    model.to(device)

    if use_scattering and augment:
        model = nn.Sequential(scattering, model)
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=mini_batch_size,
                                                   shuffle=True,
                                                   num_workers=1,
                                                   pin_memory=True,
                                                   drop_last=True)
    else:
        # pre-compute the scattering transform if necessery
        train_loader = get_scattered_loader(train_loader,
                                            scattering,
                                            device,
                                            drop_last=True,
                                            sample_batches=sample_batches)
        test_loader = get_scattered_loader(test_loader, scattering, device)

    print(f"model has {get_num_params(model)} parameters")

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum,
                                    nesterov=nesterov)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        batch_size=bs,
        sample_size=len(train_data),
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    best_acc = 0
    flat_count = 0

    results = dict(train_zeon=[],
                   train_xent=[],
                   test_zeon=[],
                   test_xent=[],
                   epoch=[])
    for epoch in range(0, epochs):
        print(f"\nEpoch: {epoch}")

        train_loss, train_acc = train(model,
                                      train_loader,
                                      optimizer,
                                      n_acc_steps=n_acc_steps)
        test_loss, test_acc = test(model, test_loader)

        results['train_zeon'].append(train_acc)
        results['train_xent'].append(train_loss)
        results['test_zeon'].append(test_acc)
        results['test_xent'].append(test_loss)
        results['epoch'].append(epoch)

        if noise_multiplier > 0:
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_norm + rdp_sgd)
            epsilon2, _ = get_privacy_spent(rdp_sgd)
            print(f"ε = {epsilon:.3f} (sgd only: ε = {epsilon2:.3f})")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
        logger.log_scalar("epsilon/train", epsilon, epoch)

        # stop if we're not making progress
        if test_acc > best_acc:
            best_acc = test_acc
            flat_count = 0
        else:
            flat_count += 1
            if flat_count >= 20 and early_stop:
                print("plateau...")
                break

    # Write to file.
    record = {
        **results,
        **{
            'best_acc': best_acc,
            'seed': seed,
            'dataset': dataset
        }
    }
    record_path = os.path.join('.', 'record', f'{dataset}-{seed}.json')
    os.makedirs(os.path.dirname(record_path), exist_ok=True)
    with open(record_path, 'w') as f:
        json.dump(record, f, indent=4)
    import logging
    logging.warning(f'Wrote to file: {record_path}')
Exemplo n.º 19
0
########################################################################

batch_size = 400  # calculate batch_size
load_batch = 100  # load batch_size(not calculate)
device = torch.device("cuda:0")  # device

lr = 0.001  # learning_rate
# load train and test name , train:test=4:1
if os.path.exists(r'./train_test_names.data'):
    train_test = pickle.load(open('./train_test_names.data', "rb"))
else:
    train_test = train_utils.get_train_test_name(dns_home)
train_noisy_names, train_clean_names, test_noisy_names, test_clean_names = \
    train_utils.get_all_names(train_test, dns_home=dns_home)

train_dataset = loader.WavDataset(train_noisy_names, train_clean_names, frame_dur=37.5)
test_dataset = loader.WavDataset(test_noisy_names, test_clean_names, frame_dur=37.5)
# dataloader
train_dataloader = DataLoader(train_dataset, batch_size=load_batch, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=load_batch, shuffle=True)

dccrn = model_cov_bn.DCCRN_(
    n_fft=512, hop_len=int(6.25 * 16000 / 1000), net_params=net_config.get_net_params(), batch_size=batch_size,
    device=device, win_length=int((25 * 16000 / 1000))).to(device)

optimizer = torch.optim.Adam(dccrn.parameters(), lr=lr)
criterion = SiSnr()
train_utils.train(model=dccrn, optimizer=optimizer, criterion=criterion, train_iter=train_dataloader,
                  test_iter=test_dataloader, max_epoch=500, device=device, batch_size=batch_size, log_path=save_file,
                  just_test=False)
Exemplo n.º 20
0
    args = parser.parse_args()
    cifar_dir = args.cifar_root
    fig_path = args.fig_path
    validation_split = args.val_split
    batch_size = args.batch_size
    epochs = args.epochs
    weight_path = args.weight_path
    weight_decay = args.weight_decay
    lr = args.lr

    SEED = args.seed # set random seed (default as 1234)

    # split train, val, test from `get_data` function
    train_loader, val_loader, test_loader = get_data(cifar_dir=cifar_dir, batch_size=batch_size, augment=True, validation_split=validation_split)

    # load model
    model = VGG_lite()
    # define loss
    loss = nn.CrossEntropyLoss()
    # train the model
    model, history = train(model, train_loader, val_loader, epochs, loss, batch_size, optimizer='adam', weight_decay=weight_decay, lr=lr)

    # save the model accordeing to `weight_path` from parser (default to './weights/final.pth')
    torch.save(model.state_dict(), weight_path)

    plot_history(history, fig_path) # save figures

    acc, cm, cm_norm = evaluate(model, test_loader) # evaluate trained model
    plot_cm(cm, cm_norm, fig_path) # save confusion matrix figures
    print('Test Accuracy: {}%'.format(round(acc*100, 4))) # print the model test accuracy
Exemplo n.º 21
0
def my_main(
    _run,
    lr,
    weight_decay,
    message,
    use_gpu,
    epochs,
    save_images,
    experiment_folder,
    batch_size,
    save_exp,
):
    print("Epochs: {}".format(epochs))
    # args["seed"] = _run.config["seed"]

    device = torch.device("cuda" if use_gpu else "cpu")
    dataloader_kwargs = {"pin_memory": True} if use_gpu else {}
    # if save_exp:
    os.makedirs(experiment_folder + "outputs/color")
    os.makedirs(experiment_folder + "outputs/gray")
    os.makedirs(experiment_folder + "checkpoints")
    best_losses = 1e10

    seed = int(time.time())

    args = {
        "num_processes": 4,
        "batch_size": 64,
        "lr": lr,
        "weight_decay": weight_decay,
        "log_interval": 100,
        "use_gpu": use_gpu,
        "epochs": epochs,
        "seed": seed,
        "experiment_folder": experiment_folder,
    }

    train_folder = "places365_standard/train"
    val_folder = "places365_standard/val"
    trained = False
    options = dict({"num_classes": (2 * 224 * 224)})
    model = AlexNet().to(device)
    print(model)
    # model = nn.DataParallel(model)
    # model.share_memory()  # gradients are allocated lazily, so they are not shared here

    processes = []
    time1 = time.time()
    train(
        1,
        args,
        model,
        device,
        dataloader_kwargs,
        train_folder,
        nn.CrossEntropyLoss,
        val_folder,
    )
    time2 = time.time()
    print("{:s} function took {:.3f} ms".format("train",
                                                (time2 - time1) * 1000.0))
Exemplo n.º 22
0
def main(feature_path=None,
         batch_size=2048,
         mini_batch_size=256,
         lr=1,
         optim="SGD",
         momentum=0.9,
         nesterov=False,
         noise_multiplier=1,
         max_grad_norm=0.1,
         max_epsilon=None,
         epochs=100,
         logdir=None):

    logger = Logger(logdir)

    device = get_device()

    # get pre-computed features
    x_train = np.load(f"{feature_path}_train.npy")
    x_test = np.load(f"{feature_path}_test.npy")

    train_data, test_data = get_data("cifar10", augment=False)
    y_train = np.asarray(train_data.targets)
    y_test = np.asarray(test_data.targets)

    trainset = torch.utils.data.TensorDataset(torch.from_numpy(x_train),
                                              torch.from_numpy(y_train))
    testset = torch.utils.data.TensorDataset(torch.from_numpy(x_test),
                                             torch.from_numpy(y_test))

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size
    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=mini_batch_size,
                                               shuffle=True,
                                               num_workers=1,
                                               pin_memory=True,
                                               drop_last=True)
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=mini_batch_size,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True)

    n_features = x_train.shape[-1]
    try:
        mean = np.load(f"{feature_path}_mean.npy")
        var = np.load(f"{feature_path}_var.npy")
    except FileNotFoundError:
        mean = np.zeros(n_features, dtype=np.float32)
        var = np.ones(n_features, dtype=np.float32)

    bn_stats = (torch.from_numpy(mean).to(device),
                torch.from_numpy(var).to(device))

    model = nn.Sequential(StandardizeLayer(bn_stats),
                          nn.Linear(n_features, 10)).to(device)

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum,
                                    nesterov=nesterov)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        sample_rate=bs / len(train_data),
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    for epoch in range(0, epochs):
        print(f"\nEpoch: {epoch}")

        train_loss, train_acc = train(model,
                                      train_loader,
                                      optimizer,
                                      n_acc_steps=n_acc_steps)
        test_loss, test_acc = test(model, test_loader)

        if noise_multiplier > 0:
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_sgd)
            print(f"ε = {epsilon:.3f}")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
Exemplo n.º 23
0
start = time.time()
now = datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S")

# 학습 정보 출력
print("----------------")
print("학습을 시작합니다.")
print("현재 시각:", now)
print("학습 데이터 : %d개" % len(train_data))
print("----------------")
print()

# 학습 시작
try:
    for iter in range(1, n_iter + 1):
        input, target = get_batch_set()
        loss = train(input, target)
        total_loss += loss

        # 현재 학습 과정 출력
        if iter % print_every == 0:
            avg_loss = total_loss / print_every
            sys.stdout.write(
                "%d %d%% (%s) %.4f\n" %
                (iter, iter / n_iter * 100, time_since(start), avg_loss))
            losses.append(avg_loss)
            total_loss = 0
            lyrics = generate_lyrics(['사랑', '발라드'])
            print(lyrics)
            print()

    sys.stdout.write("학습이 완료되었습니다.\n")
Exemplo n.º 24
0
Arquivo: main.py Projeto: sacmehta/PRU
def trainEvalLM(args):
    fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest())
    if os.path.exists(fn):
        print('Loading cached dataset...')
        corpus = torch.load(fn)
    else:
        print('Producing dataset...')
        corpus = data.Corpus(args.data)
        torch.save(corpus, fn)

    if torch.cuda.is_available():
        args.cuda = True

    ntokens = len(corpus.dictionary)
    eval_batch_size = 10
    train_data = batchify(corpus.train, args.batch_size, args)
    val_data = batchify(corpus.valid, eval_batch_size, args)

    # Build the model and loss function
    model = lmModel.RNNModel(args.model,
                             ntokens,
                             args.emsize,
                             args.nhid,
                             args.nlayers,
                             args.dropout,
                             args.tied,
                             g=args.g,
                             k=args.k)
    criterion = nn.CrossEntropyLoss()
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()

    #compute network parameters
    params = list(model.parameters())
    total_params = np.sum([np.prod(p.size()) for p in params])
    print(
        '\033[1;32;40mTotal parameters (in million):\033[0m\033[1;31;40m {:0.2f} \033[0m\n'
        .format(total_params / 1e6, 2))

    optimizer = torch.optim.SGD(params, lr=args.lr, weight_decay=args.wdecay)
    start_epoch = 1
    if args.resume:
        print('Resuming model ...')
        model, criterion, optimizer, start_epoch = model_load(args.resume)
        optimizer.param_groups[0]['lr'] = args.lr
        model.dropout = args.dropout

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        #Create folder for saving model and log files
        args.saveDir += '_' + args.model
        # =====================
        if not os.path.isdir(args.saveDir):
            os.mkdir(args.saveDir)

        save_str = 'nl_' + str(args.nlayers) + '_nh_' + str(
            args.nhid) + '_g_' + str(args.g) + '_k_' + str(args.k)
        args.save = args.saveDir + '/model_' + save_str + '.pt'

        logFileLoc = args.saveDir + '/logs_' + save_str + '.txt'
        logger = open(logFileLoc, 'w')
        logger.write(str(args))
        logger.write('\n Total parameters (in million): {:0.2f}'.format(
            total_params / 1e6, 2))
        logger.write('\n\n')
        logger.write(
            "\n%s\t%s\t%s\t%s\t%s" %
            ('Epoch', 'Loss(Tr)', 'Loss(val)', 'ppl (tr)', 'ppl (val)'))
        logger.flush()

        best_val_loss = []
        stored_loss = 100000000
        # Loop over epochs.
        for epoch in range(start_epoch, args.epochs + 1):
            epoch_start_time = time.time()
            train_loss = train(args, model, criterion, optimizer, epoch,
                               train_data, ntokens)

            ### TRAIN WITH ASGD
            if 't0' in optimizer.param_groups[0]:
                tmp = {}
                for prm in model.parameters():
                    tmp[prm] = prm.data.clone()
                    prm.data = optimizer.state[prm]['ax'].clone()

                val_loss = evaluate(args, model, criterion, val_data, ntokens,
                                    eval_batch_size)

                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        math.exp(val_loss)))
                print('-' * 89)

                logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f" %
                             (epoch, train_loss, val_loss,
                              math.exp(train_loss), math.exp(val_loss)))
                logger.flush()

                if val_loss < stored_loss:
                    model_save(args.save, model, criterion, optimizer, epoch)
                    print('Saving Averaged (new best validation)')
                    stored_loss = val_loss

                for prm in model.parameters():
                    prm.data = tmp[prm].clone()

            else:
                val_loss = evaluate(args, model, criterion, val_data, ntokens,
                                    eval_batch_size)

                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        math.exp(val_loss)))
                print('-' * 89)

                logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f" %
                             (epoch, train_loss, val_loss,
                              math.exp(train_loss), math.exp(val_loss)))
                logger.flush()

                if val_loss < stored_loss:
                    model_save(args.save, model, criterion, optimizer, epoch)
                    print('Saving model (new best validation)')
                    stored_loss = val_loss

                if 't0' not in optimizer.param_groups[0] and (
                        len(best_val_loss) > args.nonmono
                        and val_loss > min(best_val_loss[:-args.nonmono])):
                    print('Switching to ASGD')
                    optimizer = torch.optim.ASGD(model.parameters(),
                                                 lr=args.lr,
                                                 t0=0,
                                                 lambd=0.,
                                                 weight_decay=args.wdecay)
                best_val_loss.append(val_loss)
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')
Exemplo n.º 25
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--DATASET_PATH', type=str, default='/home/zhangdong/database/DUTS/')
	parser.add_argument('--WEIGHTS_PATH', type=str, default='/home/yangle/DAVIS/result/models/')
	parser.add_argument('--EXPERIMENT', type=str, default='/home/yangle/DAVIS/result/TrainNet/')
	parser.add_argument('--N_EPOCHS', type=int, default=200)
	parser.add_argument('--MAX_PATIENCE', type=int, default=30)
	parser.add_argument('--batch_size', type=int, default=32)
	parser.add_argument('--seed', type=int, default=0)
	parser.add_argument('--N_CLASSES', type=int, default=2)
	parser.add_argument('--LEARNING_RATE', type=float, default=1e-4)
	parser.add_argument('--LR_DECAY', type=float, default=0.995)
	parser.add_argument('--DECAY_LR_EVERY_N_EPOCHS', type=int, default=1)
	parser.add_argument('--WEIGHT_DECAY', type=float, default=0.0001)
	parser.add_argument('--CUDNN', type=bool, default=True)
	args = parser.parse_args()

	torch.cuda.manual_seed(args.seed)
	cudnn.benchmark = args.CUDNN

	normalize = transforms.Normalize(mean=saliency.mean, std=saliency.std)
	train_joint_transformer_img = transforms.Compose([joint_transforms.JointResize(224)])
	mask_size_list = [14, 28, 56, 112, 224]

	train_dset = saliency.Saliency(
		args.DATASET_PATH, 'train',train_joint_transformer_img, mask_size_list,
		transform=transforms.Compose([transforms.ToTensor(), normalize, ]))
	train_loader = torch.utils.data.DataLoader(
		train_dset, batch_size=args.batch_size, shuffle=True)

	test_joint_transforms_img = transforms.Compose([joint_transforms.JointResize(224)])
	val_dset = saliency.Saliency(
		args.DATASET_PATH, 'val',test_joint_transforms_img, mask_size_list,
		transform=transforms.Compose([transforms.ToTensor(),normalize]))
	val_loader = torch.utils.data.DataLoader(
		val_dset, batch_size=args.batch_size, shuffle=False)

	print("TrainImages: %d" % len(train_loader.dataset.imgs))
	print("ValImages: %d" % len(val_loader.dataset.imgs))

	# example_inputs, example_targets = next(iter(train_loader))
	# print("InputsBatchSize: ", example_inputs.size())
	# print("TargetsBatchSize: ", len(example_targets))
	# print("\nInput (size, max, min) ---")
	# # input
	# i = example_inputs[0]
	# print(i.size())
	# print(i.max())
	# print(i.min())
	# print("Target (size, max, min) ---")
	# # target
	# for mask in example_targets:
	# 	print(mask.size())
	# 	print(mask.max())
	# 	print(mask.min())


	# initialize ResNet18 from the pre-trained classification model
	resnet = torchvision.models.resnet50(pretrained=True)
	pre_trained_dict = resnet.state_dict()
	model = SegNet.resnet50()
	model_dict = model.state_dict()

	# 1. filter out unnecessary keys
	pre_trained_dict = {k: v for k, v in pre_trained_dict.items() if k in model_dict}
	# 2. overwrite entries in the existing state dict
	model_dict.update(pre_trained_dict)
	# 3. load the new state dict
	model.load_state_dict(model_dict)
	model = model.cuda()
	#model = torch.nn.DataParallel(model).cuda()

	print('  + Number of params: {}'.format(
		sum([p.data.nelement() for p in model.parameters()])))
	# model.apply(utils.weights_init)
	optimizer = optim.RMSprop(model.parameters(), lr=args.LEARNING_RATE,
							  weight_decay=args.WEIGHT_DECAY, eps=1e-12)
	criterion = nn.NLLLoss2d().cuda()

	exp_dir = args.EXPERIMENT + 'test'
	if os.path.exists(exp_dir):
		shutil.rmtree(exp_dir)

	exp = experiment.Experiment('test', args.EXPERIMENT)
	exp.init()

	START_EPOCH = exp.epoch
	END_EPOCH = START_EPOCH + args.N_EPOCHS

	for epoch in range(START_EPOCH, END_EPOCH):

		since = time.time()

		# ### Train ###
		trn_loss, trn_err = utils.train(model, train_loader, optimizer, criterion, epoch)
		print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(epoch, trn_loss, trn_err))
		time_elapsed = time.time() - since
		print('Train Time {:.0f}m {:.0f}s'.format(
			time_elapsed // 60, time_elapsed % 60))

		### Test ###
		val_loss, val_err = utils.test(model, val_loader, criterion, epoch)
		print('Val - Loss: {:.4f}, Error: {:.4f}'.format(val_loss, val_err))
		time_elapsed = time.time() - since
		print('Total Time {:.0f}m {:.0f}s\n'.format(
			time_elapsed // 60, time_elapsed % 60))

		### Save Metrics ###
		exp.save_history('train', trn_loss, trn_err)
		exp.save_history('val', val_loss, val_err)

		### Checkpoint ###
		exp.save_weights(model, trn_loss, val_loss, trn_err, val_err)
		exp.save_optimizer(optimizer, val_loss)

		## Early Stopping ##
		if (epoch - exp.best_val_loss_epoch) > args.MAX_PATIENCE:
			print(("Early stopping at epoch %d since no "
				   +"better loss found since epoch %.3").format(epoch, exp.best_val_loss))
			break

		# Adjust Lr ###--old method
		utils.adjust_learning_rate(args.LEARNING_RATE, args.LR_DECAY, optimizer,
							 epoch, args.DECAY_LR_EVERY_N_EPOCHS)

		exp.epoch += 1
Exemplo n.º 26
0
                        default=10,
                        help='The number of iterations between every logging.')
    parser.add_argument(
        '--loaderjob',
        type=int,
        default=4,
        help='The number of processes to launch for MultiprocessIterator.')
    parser.add_argument(
        '--resume',
        help='The path to the trainer snapshot to resume from. '
        'If unspecified, no snapshot will be resumed')
    args = parser.parse_args()

    with open(args.label_names, 'r') as f:
        label_names = tuple(yaml.load(f))

    if args.val is not None:
        train_data = OriginalDetectionDataset(args.train, label_names)
        val_data = OriginalDetectionDataset(args.val, label_names)
    else:
        # If --val is not supplied, the train data is split into two
        # with ratio 8:2.
        dataset = OriginalDetectionDataset(args.train, label_names)
        train_data, val_data = chainer.datasets.split_dataset_random(
            dataset, int(len(dataset) * 0.8))

    step_points = [args.step_size]
    train(train_data, val_data, label_names, args.iteration, args.lr,
          step_points, args.batchsize, args.gpu, args.out, args.val_iteration,
          args.log_iteration, args.loaderjob, args.resume)
Exemplo n.º 27
0
# In[18]:

# Main
optimizer = optim.Adam(model.parameters(), lr=args.lr)

info = {'highest F1': 100, 'saved epoch': None}

# In[19]:

print('STARTING TRAINING')

for epoch in range(1, args.epochs + 1):
    train(args,
          model,
          device,
          train_loader,
          optimizer,
          epoch,
          start_time=time.time())
    f1 = get_mean_F1(model, validation_loader)
    print('after epoch {} got f1 score of {}'.format(epoch, f1))
    if f1 > info['highest F1']:
        info['highest F1'] = np.copy(f1)
        info['saved epoch'] = epoch
        test(args, model, device, test_loader, epoch, trainDataset,
             testDataset, path_submission)
        torch.save(model, path_model)
        print('currently best model --> saved')

print('TRAINING DONE')
print(info)
Exemplo n.º 28
0
    if args.snapshot is not None:
        vocab = pickle.load(open(args.snapshot + '.vocab', 'rb'))
    else:
        vocab = None

    # load data
    train_data_dict, dev_data_dict, test_data_dict, vocab = data_utils.load_dataset(args, vocab)

    # Load model
    model = model_utils.get_model(vocab, args)

    if args.mode == 'train_r2a':
        '''
        Training R2A on labeled source and unlabeled target
        '''
        dev_res, saved_path, model = train_utils.train(train_data_dict, dev_data_dict, model, args)

        # saving the vocabulary
        if args.save:
            with open(saved_path+'.vocab', 'wb') as f:
                pickle.dump(vocab, f, pickle.HIGHEST_PROTOCOL)

        # evaluate performance on the source train & dev set
        tar_train = None if args.tar_dataset == '' else train_data_dict[args.tar_dataset]
        tar_dev   = None if args.tar_dataset == '' else dev_data_dict[args.tar_dataset]

        print("\n=== train ====")
        train_res = []
        for task in args.src_dataset:
            cur_res = train_utils.evaluate_task(
                    train_data_dict[task], task, tar_train, model, None, args)
Exemplo n.º 29
0
    # ------------------------------------------------------------------------
    if args.snapshot is None:
        model = model_utils.get_model(vocab, args)
    else:
        # load saved model
        print('\nLoading model from [%s]...' % args.snapshot)
        try:
            model = torch.load(args.snapshot)
        except Exception as e:
            print(e)
            exit(1)

    print("Load complete")

    # Train the model on train_data, use dev_data for early stopping
    model, dev_res = train_utils.train(train_data, dev_data, model, args)

    # Evaluate the trained model
    print("Evaluate on train set")
    train_res = train_utils.evaluate(train_data, model, args)

    print("Evaluate on test set")
    test_res = train_utils.evaluate(test_data, model, args, roc=True)

    if args.result_path:
        directory = args.result_path[:args.result_path.rfind('/')]
        if not os.path.exists(directory):
            os.makedirs(directory)

        result = {
            'train_loss': train_res[0],
                        required=False,
                        help='Freeze the model after training.')

    parser.add_argument(
        '--binarization',
        choices=['deterministic-binary', 'stochastic-binary', 'disabled'],
        action='store',
        required=False,
        default='deterministic-binary',
        help='binarization mode')

    return parser.parse_args()


if __name__ == '__main__':

    tf.set_random_seed(_RANDOM_SEED)

    parsed_args = args_parser(sys.argv)

    dataset = get_dataset(parsed_args.dataset, parsed_args.epochs,
                          parsed_args.batch_size)

    train(
        parsed_args.epochs, parsed_args.batch_size, dataset,
        get_model_fn(parsed_args.model, parsed_args.binarization),
        get_optimiser_fn(parsed_args.model, parsed_args.epochs,
                         parsed_args.batch_size, dataset),
        parsed_args.resume_from_latest_checkpoint, parsed_args.tag,
        parsed_args.freeze)