Exemplo n.º 1
0
 def init_optimizer(self):
     self.optimizer = SGD(self.model.parameters(), lr=INIT_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
     self.optimizer_scheduler = lr_scheduler.StepLR(self.optimizer, step_size=STEP_SIZE,
                                                    gamma=0.5 ** (STEP_SIZE / LR_HALF_LIFE))
     self.optimizer_wrapper = OptimizerWrapper(self.model, self.optimizer, self.optimizer_scheduler)
Exemplo n.º 2
0
def Train():
    print('********************load data********************')
    if args.dataset == 'NIHCXR':
        dataloader_train = get_train_dataloader_NIH(batch_size=config['BATCH_SIZE'], shuffle=True, num_workers=8)
        dataloader_val = get_test_dataloader_NIH(batch_size=config['BATCH_SIZE'], shuffle=False, num_workers=8)
    elif args.dataset == 'VinCXR':
        dataloader_train = get_train_dataloader_VIN(batch_size=config['BATCH_SIZE'], shuffle=True, num_workers=8)
        dataloader_val = get_val_dataloader_VIN(batch_size=config['BATCH_SIZE'], shuffle=False, num_workers=8)
    else:
        print('No required dataset')
        return
    print('********************load data succeed!********************')

    print('********************load model********************')
    if args.model == 'CXRNet' and args.dataset == 'NIHCXR':
        N_CLASSES = len(CLASS_NAMES_NIH)
        model = CXRNet(num_classes=N_CLASSES, is_pre_trained=True)#initialize model
        CKPT_PATH = config['CKPT_PATH'] + args.model + '_' + args.dataset + '_best.pkl'
        if os.path.exists(CKPT_PATH):
            checkpoint = torch.load(CKPT_PATH)
            model.load_state_dict(checkpoint) #strict=False
            print("=> Loaded well-trained CXRNet model checkpoint of NIH-CXR dataset: "+CKPT_PATH)
    elif args.model == 'CXRNet' and args.dataset == 'VinCXR':
        N_CLASSES = len(CLASS_NAMES_Vin)
        model = CXRNet(num_classes=N_CLASSES, is_pre_trained=True)#initialize model
        CKPT_PATH = config['CKPT_PATH'] + args.model + '_' + args.dataset + '_best.pkl'
        if os.path.exists(CKPT_PATH):
            checkpoint = torch.load(CKPT_PATH)
            model.load_state_dict(checkpoint) #strict=False
            print("=> Loaded well-trained CXRNet model checkpoint of NIH-CXR dataset: "+CKPT_PATH)
    else: 
        print('No required model')
        return #over
    model = nn.DataParallel(model).cuda()  # make model available multi GPU cores training    
    optimizer_model = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
    lr_scheduler_model = lr_scheduler.StepLR(optimizer_model , step_size = 10, gamma = 1)
    torch.backends.cudnn.benchmark = True  # improve train speed slightly
    bce_criterion = nn.BCELoss() #define binary cross-entropy loss
    print('********************load model succeed!********************')

    print('********************begin training!********************')
    AUROC_best = 0.50
    for epoch in range(config['MAX_EPOCHS']):
        since = time.time()
        print('Epoch {}/{}'.format(epoch+1 , config['MAX_EPOCHS']))
        print('-' * 10)
        model.train()  #set model to training mode
        train_loss = []
        with torch.autograd.enable_grad():
            for batch_idx, (image, label, _) in enumerate(dataloader_train):
                var_image = torch.autograd.Variable(image).cuda()
                var_label = torch.autograd.Variable(label).cuda()

                optimizer_model.zero_grad()
                _, var_output = model(var_image)
                loss_tensor = bce_criterion(var_output, var_label)#backward
                loss_tensor.backward()
                optimizer_model.step()##update parameters
                
                sys.stdout.write('\r Epoch: {} / Step: {} : train loss = {}'.format(epoch+1, batch_idx+1, float('%0.6f'%loss_tensor.item())))
                sys.stdout.flush()
                train_loss.append(loss_tensor.item())
        lr_scheduler_model.step()  #about lr and gamma
        print("\r Eopch: %5d train loss = %.6f" % (epoch + 1, np.mean(train_loss))) 

        model.eval()#turn to test mode
        val_loss = []
        gt = torch.FloatTensor().cuda()
        pred = torch.FloatTensor().cuda()
        with torch.autograd.no_grad():
            for batch_idx, (image, label, _) in enumerate(dataloader_val):
                var_image = torch.autograd.Variable(image).cuda()
                var_label = torch.autograd.Variable(label).cuda()
                _, var_output = model(var_image)#forward
                loss_tensor = bce_criterion(var_output, var_label)#backward
                sys.stdout.write('\r Epoch: {} / Step: {} : validation loss = {}'.format(epoch+1, batch_idx+1, float('%0.6f'%loss_tensor.item())))
                sys.stdout.flush()
                val_loss.append(loss_tensor.item())
                gt = torch.cat((gt, label.cuda()), 0)
                pred = torch.cat((pred, var_output.data), 0)
        AUROCs = compute_AUCs(gt, pred, N_CLASSES)
        AUROC_avg = np.array(AUROCs).mean()
        logger.info("\r Eopch: %5d validation loss = %.6f, Validataion AUROC = %.4f" % (epoch + 1, np.mean(val_loss), AUROC_avg)) 

        if AUROC_best < AUROC_avg:
            AUROC_best = AUROC_avg
            CKPT_PATH = config['CKPT_PATH'] + args.model + '_' + args.dataset + '_best.pkl'
            torch.save(model.module.state_dict(), CKPT_PATH) #Saving torch.nn.DataParallel Models
            print(' Epoch: {} model has been already save!'.format(epoch+1))

        time_elapsed = time.time() - since
        print('Training epoch: {} completed in {:.0f}m {:.0f}s'.format(epoch+1, time_elapsed // 60 , time_elapsed % 60))
#model name passed as arguments
model_name = models[in_args.m]
model = getModel(class_names, model_name)

#learning rate
lr = in_args.lr

# Criteria NLLLoss which is recommended with Softmax final layer
#(negative log loss)
criteria = nn.NLLLoss()
# Observe that all parameters are being optimized

#call model.fc.parameters
#model.classifier is not an attribute for resnet
#do not change
if in_args.m == "resnet":
    optimizer = optim.Adam(model.fc.parameters(), lr)
else:
    optimizer = optim.Adam(model.classifier.parameters(), lr)
# Decay LR by a factor of 0.1 every 4 epochs
sched = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)

# Number of epochs
eps = in_args.eps

model_ft = train_model(model, dataloaders, criteria, optimizer, sched,
                       dataset_sizes, eps)
print("Model trained")

calc_accuracy(model_ft, 'test', dataloaders)
print("Accuracy")
Exemplo n.º 4
0
rel_rec = np.array(encode_onehot(np.where(off_diag)[1]), dtype=np.float32)
rel_send = np.array(encode_onehot(np.where(off_diag)[0]), dtype=np.float32)
rel_rec = torch.FloatTensor(rel_rec)
rel_send = torch.FloatTensor(rel_send)

if args.encoder == 'mlp':
    model = MLPEncoder(args.timesteps * args.dims, args.hidden,
                       args.edge_types, args.dropout, args.factor)
elif args.encoder == 'cnn':
    model = CNNEncoder(args.dims, args.hidden, args.edge_types, args.dropout,
                       args.factor)

optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = lr_scheduler.StepLR(optimizer,
                                step_size=args.lr_decay,
                                gamma=args.gamma)

# Linear indices of an upper triangular mx, used for loss calculation
triu_indices = get_triu_offdiag_indices(args.num_atoms)

if args.cuda:
    model.cuda()
    rel_rec = rel_rec.cuda()
    rel_send = rel_send.cuda()
    triu_indices = triu_indices.cuda()

rel_rec = Variable(rel_rec)
rel_send = Variable(rel_send)

best_model_params = model.state_dict()
Exemplo n.º 5
0
def main(args):
    device = torch.device("cuda")
    r = args.r

    # load data
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ])),
                                               batch_size=100,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ])),
                                              batch_size=100,
                                              shuffle=True)

    # creat NICE model
    model = NICE().to(device)
    model.cnn.load_state_dict(torch.load('pretrained_model/model.pth'))

    # freeze target model
    for p in model.cnn.parameters():
        p.requires_grad = False

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = lrsch.StepLR(optimizer, step_size=5, gamma=0.1)

    # Train the model
    for epoch in range(10):

        model.train()
        for batch_idx, (xb, yb) in enumerate(train_loader):
            # Convert numpy arrays to torch tensors
            inputs = xb.to(device)
            targets = yb.to(device)

            # Forward pass
            outputs, z, loss2 = model(inputs)

            loss1 = F.nll_loss(outputs, targets)
            loss2 = torch.mean(loss2)
            loss = loss1 + r * loss2

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        scheduler.step()
        if (epoch + 1) % 1 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}, Loss1: {:.4f}, Loss2: {:.4f}'.
                  format(epoch + 1, 10, loss.item(), loss1.item(),
                         loss2.item() * r))

        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output, _, _ = model(data)
                test_loss += F.nll_loss(output,
                                        target).item()  # sum up batch loss
                pred = output.argmax(
                    dim=1,
                    keepdim=True)  # get the index of the max log-probability
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)

        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
              format(test_loss, correct, len(test_loader.dataset),
                     100. * correct / len(test_loader.dataset)))

    if not os.path.exists('nice_model'):
        os.makedirs('nice_model')
    torch.save(model.state_dict(), 'nice_model/model_r_{}.pth'.format(r))
Exemplo n.º 6
0
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))

    # tensorboardX
    writer = SummaryWriter(log_dir=osp.join(args.save_dir, 'summary'))

    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
    )

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    if args.random_erasing:
        transform_train = T.Compose([
            T.Random2DTranslation(args.height, args.width),
            T.RandomHorizontalFlip(),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            RandomErasing(probability=args.probability, mean=[0.0, 0.0, 0.0]),
        ])

    # transform_test = T.Compose([
    #     T.Resize((args.height, args.width)),
    #     T.ToTensor(),
    #     T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # ])

    pin_memory = True if use_gpu else False

    if args.loss == 'xent,htri':
        trainloader = DataLoader(
            ImageDataset(dataset.train, transform=transform_train),
            sampler=RandomIdentitySampler(dataset.train,
                                          num_instances=args.num_instances),
            batch_size=args.train_batch,
            num_workers=args.workers,
            pin_memory=pin_memory,
            drop_last=True,
        )
    elif args.loss == 'xent':
        trainloader = DataLoader(
            ImageDataset(dataset.train, transform=transform_train),
            batch_size=args.train_batch,
            shuffle=True,
            num_workers=args.workers,
            pin_memory=pin_memory,
            drop_last=True,
        )

    # queryloader = DataLoader(
    #     ImageDataset(dataset.query, transform=transform_test),
    #     batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
    #     pin_memory=pin_memory, drop_last=False,
    # )

    # galleryloader = DataLoader(
    #     ImageDataset(dataset.gallery, transform=transform_test),
    #     batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
    #     pin_memory=pin_memory, drop_last=False,
    # )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss=args.loss)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion_xent = CrossEntropyLabelSmooth(
        num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    criterion_htri = TripletLoss(margin=args.margin)

    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    if args.stepsize > 0:
        if not args.warmup:
            scheduler = lr_scheduler.StepLR(optimizer,
                                            step_size=args.stepsize,
                                            gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return

    def adjust_lr(optimizer, ep):
        if ep < 20:
            lr = 1e-4 * (ep + 1) / 2
        elif ep < 80:
            #lr = 1e-3 * len(args.gpu_devices)
            lr = 1e-3
        elif ep < 180:
            #lr = 1e-4 * len(args.gpu_devices)
            lr = 1e-4
        elif ep < 300:
            #lr = 1e-5 * len(args.gpu_devices)
            lr = 1e-5
        elif ep < 320:
            #lr = 1e-5 * 0.1 ** ((ep - 320) / 80) * len(args.gpu_devices)
            lr = 1e-5 * 0.1**((ep - 320) / 80)
        elif ep < 400:
            lr = 1e-6
        elif ep < 480:
            #lr = 1e-4 * len(args.gpu_devices)
            lr = 1e-4
        else:
            #lr = 1e-5 * len(args.gpu_devices)
            lr = 1e-5
        for p in optimizer.param_groups:
            p['lr'] = lr

    length = len(trainloader)
    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    #best_rerank1 = -np.inf
    #best_rerankepoch = 0
    print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        if args.stepsize > 0:
            if args.warmup:
                adjust_lr(optimizer, epoch + 1)
            else:
                scheduler.step()
        train(epoch,
              model,
              criterion_xent,
              criterion_htri,
              optimizer,
              trainloader,
              use_gpu=use_gpu,
              summary=writer,
              length=length)
        train_time += round(time.time() - start_train_time)

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            # print("==> Test")
            # rank1 = test(epoch, model, queryloader, galleryloader, use_gpu=True, summary=writer)
            # is_best = rank1 > best_rank1
            # if is_best:
            #     best_rank1 = rank1
            #     best_epoch = epoch + 1
            # ####### Best Rerank
            # #is_rerankbest = rerank1 > best_rerank1
            # #if is_rerankbest:
            # #    best_rerank1 = rerank1
            # #    best_rerankepoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    writer.close()
    # print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch))
    #print("==> Best Rerank-1 {:.1%}, achieved at epoch {}".format(best_rerank1, best_rerankepoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Exemplo n.º 7
0
    def train(model, optimizer, data_loaders, metrics, transformers_dict, prot_desc_dict, tasks, view,
              n_iters=5000, is_hsearch=False, sim_data_node=None, epoch_ckpt=(2, 1.0), tb_writer=None):
        tb_writer = tb_writer()
        comp_view, prot_view = view
        start = time.time()
        best_model_wts = model.state_dict()
        best_score = -10000
        best_epoch = -1
        terminate_training = False
        e_avg = ExpAverage(.01)
        n_epochs = n_iters // len(data_loaders["train"])
        scheduler = sch.StepLR(optimizer, step_size=400, gamma=0.01)
        criterion = torch.nn.MSELoss()

        # sub-nodes of sim data resource
        loss_lst = []
        train_loss_node = DataNode(label="training_loss", data=loss_lst)
        metrics_dict = {}
        metrics_node = DataNode(label="validation_metrics", data=metrics_dict)
        scores_lst = []
        scores_node = DataNode(label="validation_score", data=scores_lst)

        # add sim data nodes to parent node
        if sim_data_node:
            sim_data_node.data = [train_loss_node, metrics_node, scores_node]
        try:
            # Main training loop
            tb_idx = {'train': Count(), 'val': Count(), 'test': Count()}
            for epoch in range(n_epochs):
                if terminate_training:
                    print("Terminating training...")
                    break
                for phase in ["train", "val" if is_hsearch else "test"]:
                    if phase == "train":
                        print("Training....")
                        # Training mode
                        model.train()
                    else:
                        print("Validation...")
                        # Evaluation mode
                        model.eval()

                    data_size = 0.
                    epoch_losses = []
                    epoch_scores = []

                    # Iterate through mini-batches
                    i = 0
                    with TBMeanTracker(tb_writer, 10) as tracker:
                        for batch in tqdm(data_loaders[phase]):
                            batch_size, data = batch_collator(batch, prot_desc_dict, spec=comp_view)
                            # Data
                            if prot_view in ["p2v", "rnn", "pcnn", "pcnna"]:
                                protein_x = data[comp_view][0][2]
                            else:  # then it's psc
                                protein_x = data[comp_view][0][1]
                            if comp_view == "gconv":
                                # graph data structure is: [(compound data, batch_size), protein_data]
                                X = ((data[comp_view][0][0], batch_size), protein_x)
                            else:
                                X = (data[comp_view][0][0], protein_x)
                            y = data[comp_view][1]
                            w = data[comp_view][2]
                            y = np.array([k for k in y], dtype=np.float)
                            w = np.array([k for k in w], dtype=np.float)

                            optimizer.zero_grad()

                            # forward propagation
                            # track history if only in train
                            with torch.set_grad_enabled(phase == "train"):
                                outputs = model(X)
                                target = torch.from_numpy(y).float()
                                weights = torch.from_numpy(w).float()
                                if cuda:
                                    target = target.cuda()
                                    weights = weights.cuda()
                                outputs = outputs * weights
                                target = target * weights
                                loss = criterion(outputs, target)

                            if str(loss.item()) == "nan":
                                terminate_training = True
                                break

                            # metrics
                            eval_dict = {}
                            score = SingleViewDTI.evaluate(eval_dict, y, outputs, w, metrics, tasks,
                                                           transformers_dict[comp_view])

                            # TBoard info
                            tracker.track("%s/loss" % phase, loss.item(), tb_idx[phase].IncAndGet())
                            tracker.track("%s/score" % phase, score, tb_idx[phase].i)
                            for k in eval_dict:
                                tracker.track('{}/{}'.format(phase, k), eval_dict[k], tb_idx[phase].i)

                            if phase == "train":
                                print("\tEpoch={}/{}, batch={}/{}, loss={:.4f}".format(epoch + 1, n_epochs, i + 1,
                                                                                       len(data_loaders[phase]),
                                                                                       loss.item()))
                                # for epoch stats
                                epoch_losses.append(loss.item())

                                # for sim data resource
                                loss_lst.append(loss.item())

                                # optimization ops
                                loss.backward()
                                optimizer.step()
                            else:
                                # for epoch stats
                                epoch_scores.append(score)

                                # for sim data resource
                                scores_lst.append(score)
                                for m in eval_dict:
                                    if m in metrics_dict:
                                        metrics_dict[m].append(eval_dict[m])
                                    else:
                                        metrics_dict[m] = [eval_dict[m]]

                                print("\nEpoch={}/{}, batch={}/{}, "
                                      "evaluation results= {}, score={}".format(epoch + 1, n_epochs, i + 1,
                                                                                len(data_loaders[phase]),
                                                                                eval_dict, score))

                            i += 1
                            data_size += batch_size
                    # End of mini=batch iterations.
                    if phase == "train":
                        ep_loss = np.nanmean(epoch_losses)
                        e_avg.update(ep_loss)
                        if epoch % (epoch_ckpt[0] - 1) == 0 and epoch > 0:
                            if e_avg.value > epoch_ckpt[1]:
                                terminate_training = True

                        # Adjust the learning rate.
                        scheduler.step()
                        print("\nPhase: {}, avg task loss={:.4f}, ".format(phase, np.nanmean(epoch_losses)))
                    else:
                        mean_score = np.mean(epoch_scores)
                        if best_score < mean_score:
                            best_score = mean_score
                            best_model_wts = copy.deepcopy(model.state_dict())
                            best_epoch = epoch
        except Exception as e:
            print(str(e))
        duration = time.time() - start
        print('\nModel training duration: {:.0f}m {:.0f}s'.format(duration // 60, duration % 60))
        model.load_state_dict(best_model_wts)
        return {'model': model, 'score': best_score, 'epoch': best_epoch}
Exemplo n.º 8
0
def main(args):
    def train_model(model, criterion, optimizer, scheduler, num_epochs):
        since = time()
        best_model_wts = copy.deepcopy(model.state_dict())

        for epoch in range(num_epochs):
            print('Epoch {}/{}'.format(epoch, num_epochs - 1))
            print('-' * 10)

            running_loss = 0.0
            running_corrects = []

            if args.matrix == "yes":
                true = [[] for _ in range(11)]
                pred = [[] for _ in range(11)]
            # true_p = 0
            # true_n = 0
            # false_p = 0
            # false_n = 0

            for j, data in enumerate(train_loader):
                inputs, labels = data

                if torch.cuda.is_available():
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    if args.matrix == "yes":
                        true.extend(labels.cpu().numpy())

                if not gpu and args.matrix == "yes":
                    for row in labels:
                        for l in range(11):
                            true[l].append(row.numpy()[l])

                optimizer.zero_grad()
                outputs = model(inputs)

                preds = outputs > 0.5

                if args.matrix == "yes":
                    for row in preds:
                        for l in range(11):
                            pred[l].append(row.float().numpy()[l])

                loss = criterion(
                    outputs.view(-1).float(),
                    labels.view(-1).float())

                loss.backward()
                optimizer.step()

                # for q in range(preds.shape[0]):
                # 	for w in range(preds[0].shape[0]):
                # 		if preds[q][w].float().item() == 1:
                # 			if labels[q][w].float().item() == 1:
                # 				true_p += 1
                # 			elif labels[q][w].float().item() == 0:
                # 				false_p += 1
                # 		elif preds[q][w].float().item() == 0:
                # 			if labels[q][w].float().item() == 1:
                # 				false_n += 1
                # 			elif labels[q][w].float().item() == 0:
                # 				true_n += 1

                running_loss += loss.item()
                running_corrects.append(
                    torch.sum((preds.float() == labels.float()) *
                              (labels.float() > 0)).item() /
                    (1e-5 + (preds > 0).sum().item()))

            scheduler.step()

            epoch_loss = running_loss / len(running_corrects)
            epoch_acc = sum(running_corrects) / len(running_corrects)

            model.eval()
            # val_loss, val_acc = evaluate(model, valid_loader)
            model.train()

            plot_train_acc.append(epoch_acc)
            # plot_valid_acc.append(val_acc)
            plot_train_loss.append(epoch_loss)
            # plot_valid_loss.append(val_loss)
            nRec.append(epoch)

            # precision = true_p / (true_p + false_p)
            # recall = true_p / (true_p + false_n)

            print('Train Loss: {:.4f} Train Acc: {:.4f}'.format(
                epoch_loss, epoch_acc))
            # print('Train Loss: {:.4f} Train Acc: {:.4f} Val Loss: {:.4f} Val Acc: {:.4f}'.format(epoch_loss, epoch_acc, val_loss, val_acc))
            # print('TP: %d TN: %d FP: %d FN: %d' % (true_p,true_n,false_p,false_n))
            # print('Precision:  {:.4f} Recall  {:.4f}'.format(precision, recall))
            print()

        test_loss, test_acc = evaluate(model, test_loader)
        print('test Loss: {:.4f} test Acc: {:.4f}'.format(test_loss, test_acc))
        time_elapsed = time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        # print('Best val Acc: {:4f}'.format(max(plot_valid_acc)))

        model.load_state_dict(best_model_wts)
        return model

    data = pd.read_pickle('%s.pkl' % args.pkl_file)

    data = pd.read_pickle("string_test.pkl")
    str_data = pd.read_pickle("string_test_pt2.pkl")

    labels = data["instruments"].values
    music_data = data["normalized"].values

    str_labels = str_data["instruments"].values
    str_music_data = str_data["normalized"].values

    music_data = np.stack(music_data).reshape(-1, 128 * 65)  #65*128, 1025 * 65
    str_music_data = np.stack(str_music_data).reshape(-1, 128 *
                                                      65)  #65*128, 1025 * 65

    train_data, valid_data, train_labels, valid_labels = train_test_split(
        music_data, labels, test_size=0.1, random_state=1)
    # train_data, valid_data, train_labels, valid_labels = train_data[0:100], valid_data[0:100], train_labels[0:100], valid_labels[0:100]

    train_set = MusicDataset(train_data, train_labels)
    valid_set = MusicDataset(valid_data, valid_labels)
    test_set = MusicDataset(str_music_data, str_labels)
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              shuffle=True)
    valid_loader = DataLoader(valid_set,
                              batch_size=args.batch_size,
                              shuffle=True)
    test_loader = DataLoader(test_set,
                             batch_size=args.batch_size,
                             shuffle=True)

    model_ft = MultiInstrumClass(128 * 65, 11, args.emb_dim, args.hidden_dim,
                                 args.model)
    # model_ft = MultiLP(128*64)

    if torch.cuda.is_available():
        model.cuda()

    plot_train_acc, plot_valid_acc, plot_train_loss, plot_valid_loss, nRec = [], [], [], [], []

    optimizer_ft = torch.optim.Adam(model_ft.parameters(),
                                    lr=args.lr,
                                    weight_decay=.04)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft,
                                           step_size=7,
                                           gamma=0.1)
    model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           args.epochs)

    fig = plt.figure()
    ax = plt.subplot(1, 2, 1)
    plt.plot(nRec, plot_train_acc, label='Training')
    plt.plot(nRec, plot_valid_acc, label='Validation')
    plt.title('Accuracy vs. Epoch')
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    ax.legend()

    bx = plt.subplot(1, 2, 2)
    bx.plot(nRec, plot_train_loss, label='Training')
    bx.plot(nRec, plot_valid_loss, label='Validation')
    plt.title('Loss vs. Epoch')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    bx.legend()
    plt.show()
    plt.savefig("%s.png" % args.model)
    plt.clf()
Exemplo n.º 9
0
def main():
    args.cuda = True
    # 1 choose the data you want to use
    # using_data = {'my_sp': False,
    #               'my_cm': False,
    #               'template_casia_casia': False,
    #               'template_coco_casia': False,
    #               'cod10k': True,
    #               'casia': False,
    #               'coverage': False,
    #               'columb': False,
    #               'negative_coco': False,
    #               'negative_casia': False,
    #               'texture_sp': False,
    #               'texture_cm': False,
    #               }
    using_data = {
        'my_sp': True,
        'my_cm': True,
        'template_casia_casia': True,
        'template_coco_casia': True,
        'cod10k': True,
        'casia': False,
        'coverage': False,
        'columb': False,
        'negative_coco': True,
        'negative_casia': False,
        'texture_sp': True,
        'texture_cm': True,
    }
    using_data_test = {
        'my_sp': False,
        'my_cm': False,
        'template_casia_casia': False,
        'template_coco_casia': False,
        'cod10k': False,
        'casia': False,
        'coverage': True,
        'columb': False,
        'negative_coco': False,
        'negative_casia': False,
    }
    # 2 define 3 types
    trainData = TamperDataset(stage_type='stage2',
                              using_data=using_data,
                              train_val_test_mode='train')
    valData = TamperDataset(stage_type='stage2',
                            using_data=using_data,
                            train_val_test_mode='val')
    testData = TamperDataset(stage_type='stage2',
                             using_data=using_data_test,
                             train_val_test_mode='test')

    # 3 specific dataloader
    trainDataLoader = torch.utils.data.DataLoader(trainData,
                                                  batch_size=args.batch_size,
                                                  num_workers=3,
                                                  shuffle=True,
                                                  pin_memory=False)
    valDataLoader = torch.utils.data.DataLoader(valData,
                                                batch_size=args.batch_size,
                                                num_workers=3)

    testDataLoader = torch.utils.data.DataLoader(testData,
                                                 batch_size=args.batch_size,
                                                 num_workers=0)
    # model
    model1 = Net1()
    model2 = Net2()
    if torch.cuda.is_available():
        model1.cuda()
        model2.cuda()
    else:
        model1.cpu()
        model2.cpu()

    # 模型初始化
    # 如果没有这一步会根据正态分布自动初始化
    model1.apply(weights_init)
    model2.apply(weights_init)

    # 模型可持续化
    # optimizer1 = optim.Adam(model1.parameters(), lr=1e-5, betas=(0.9, 0.999), eps=1e-8)
    optimizer2 = optim.Adam(model2.parameters(),
                            lr=args.lr,
                            betas=(0.9, 0.999),
                            eps=1e-8)
    if args.resume[0]:
        if isfile(args.resume[0]):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint1 = torch.load(args.resume[0])
            # checkpoint2 = torch.load(args.resume[1])
            model1.load_state_dict(checkpoint1['state_dict'])
            # optimizer1.load_state_dict(checkpoint1['optimizer'])
            ################################################
            # model2.load_state_dict(checkpoint2['state_dict'])
            # optimizer2.load_state_dict(checkpoint2['optimizer'])
            print("=> loaded checkpoint '{}'".format(args.resume))

        else:
            print("=> Error!!!! checkpoint found at '{}'".format(args.resume))

    else:
        print("=> no checkpoint found at '{}'".format(args.resume))

    # 调整学习率
    # scheduler1 = lr_scheduler.StepLR(optimizer1, step_size=args.stepsize, gamma=args.gamma)
    scheduler2 = lr_scheduler.StepLR(optimizer2,
                                     step_size=args.stepsize,
                                     gamma=args.gamma)
    # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=3, verbose=True)
    # 数据迭代器

    for epoch in range(args.start_epoch, args.maxepoch):
        train_avg = train(model1=model1,
                          model2=model2,
                          optimizer2=optimizer2,
                          dataParser=trainDataLoader,
                          epoch=epoch)

        val_avg = val(model1=model1,
                      model2=model2,
                      dataParser=valDataLoader,
                      epoch=epoch)
        test_avg = test(model1=model1,
                        model2=model2,
                        dataParser=testDataLoader,
                        epoch=epoch)
        """""" """""" """""" """""" """"""
        "          写入图             "
        """""" """""" """""" """""" """"""
        try:
            writer.add_scalars('tr/val/test_avg_loss_per_epoch', {
                'train': train_avg['loss_avg'],
                'val': val_avg['loss_avg'],
                'test': test_avg['loss_avg']
            },
                               global_step=epoch)
            writer.add_scalars('tr/val/test_avg_f1_per_epoch', {
                'train': train_avg['f1_avg_stage2'],
                'val': val_avg['f1_avg_stage2'],
                'test': test_avg['f1_avg_stage2']
            },
                               global_step=epoch)

            writer.add_scalars('tr/val/test_avg_precision_per_epoch', {
                'train': train_avg['precision_avg_stage2'],
                'val': val_avg['precision_avg_stage2'],
                'test': test_avg['precision_avg_stage2']
            },
                               global_step=epoch)
            writer.add_scalars('tr/val/test_avg_acc_per_epoch', {
                'train': train_avg['accuracy_avg_stage2'],
                'val': val_avg['accuracy_avg_stage2'],
                'test': test_avg['accuracy_avg_stage2']
            },
                               global_step=epoch)
            writer.add_scalars('tr/val/test_avg_recall_per_epoch', {
                'train': train_avg['recall_avg_stage2'],
                'val': val_avg['recall_avg_stage2'],
                'test': test_avg['recall_avg_stage2']
            },
                               global_step=epoch)

            writer.add_scalar('lr_per_epoch_stage2',
                              scheduler2.get_lr(),
                              global_step=epoch)
        except Exception as e:
            print(e)
        """""" """""" """""" """""" """"""
        "          写入图            "
        """""" """""" """""" """""" """"""

        output_name = output_name_file_name % \
                      (epoch, val_avg['loss_avg'],
                       val_avg['f1_avg_stage2'],
                       val_avg['precision_avg_stage2'],
                       val_avg['accuracy_avg_stage2'],
                       val_avg['recall_avg_stage2'])

        try:
            # send_msn(epoch, f1=val_avg['f1_avg'])
            email_output_train = 'The train epoch:%d,f1:%f,loss:%f,precision:%f,accuracy:%f,recall:%f' % \
                                 (epoch, train_avg['loss_avg'], train_avg['f1_avg'], train_avg['precision_avg'],
                                  train_avg['accuracy_avg'], train_avg['recall_avg'])
            email_output_val = 'The val epoch:%d,f1:%f,loss:%f,precision:%f,accuracy:%f,recall:%f' % \
                               (epoch, val_avg['loss_avg'], val_avg['f1_avg'], val_avg['precision_avg'],
                                val_avg['accuracy_avg'], val_avg['recall_avg'])
            email_output_test = 'The test epoch:%d,f1:%f,loss:%f,precision:%f,accuracy:%f,recall:%f' % \
                                (epoch, test_avg['loss_avg'], test_avg['f1_avg'], test_avg['precision_avg'],
                                 test_avg['accuracy_avg'], test_avg['recall_avg'])

            email_output = email_output_train + '\n' + email_output_val + '\n' + email_output_test + '\n\n\n'
            email_list.append(email_output)
            send_email(str(email_header), context=str(email_list))

        except:
            pass
        if epoch % 1 == 0:
            # save_model_name_stage1 = os.path.join(args.model_save_dir, 'stage1' + output_name)
            save_model_name_stage2 = os.path.join(args.model_save_dir,
                                                  'stage2' + output_name)
            # torch.save({'epoch': epoch, 'state_dict': model1.state_dict(), 'optimizer': optimizer1.state_dict()},
            #            save_model_name_stage1)
            torch.save(
                {
                    'epoch': epoch,
                    'state_dict': model2.state_dict(),
                    'optimizer': optimizer2.state_dict()
                }, save_model_name_stage2)

        # scheduler1.step(epoch=epoch)
        scheduler2.step(epoch=epoch)
    print('训练已完成!')
Exemplo n.º 10
0
def main(args):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    #add logging
    print(args)
    log_path, model_path = do_logging(args.logs_base_path,
                                      args.models_base_path)

    #original transforms:
    #data_transforms = {
    #    'train': transforms.Compose([
    #        # transforms.RandomResizedCrop(224),
    #        # transforms.RandomHorizontalFlip(),
    #        transforms.ToTensor(),
    #        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    #    ]),
    #    'val': transforms.Compose([
    #        # transforms.Resize(256),
    #        # transforms.CenterCrop(224),
    #        transforms.ToTensor(),
    #        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    #    ]),
    #}

    #load data
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.ToPILImage(
            ),  #might have to leave this out when using cifar10
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val':
        transforms.Compose([
            transforms.ToPILImage(
            ),  #might have to leave this out when using cifar10
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    datasets_path = os.path.join(os.path.expanduser(args.root_path),
                                 "datasets")
    dataset_depth = {
        'train': args.train_dataset_depth,
        'val': args.val_dataset_depth
    }
    # if I use my own datasets, they are always .pid files, so i want to keep to keep these in a separate folder from the jpeg files loaded from torchvision.
    if args.torchvision_dataset:
        datasets_path = os.path.join(os.path.expanduser(args.root_path),
                                     "jpg_datasets")
    dataloaders, dataset_sizes, num_classes = None, None, None

    dataset_names_csvs = [{
        'name': 'omniglot_1_folder_splits',
        'csv_train':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_train_1_folder.csv',
        'csv_val':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/omniglot_val_1_folder.csv',
        'best_models_path': ''
    }, {
        'name': 'vggface2',
        'csv_train':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/train_vggface2.csv',
        'csv_val':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/test_vggface2.csv',
        'best_models_path': ''
    }, {
        'name': 'CASIA_aligned',
        'csv_train':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/CASIA_train.csv',
        'csv_val':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/CASIA_test.csv',
        'best_models_path': ''
    }, {
        'name': 'inat_reptiles',
        'csv_train':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/inaturalist2019_alphabet_csvs/train/Reptiles.csv',
        'csv_val':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/inaturalist2019_alphabet_csvs/val/Reptiles.csv',
        'best_models_path': ''
    }, {
        'name': 'inat_amphibians',
        'csv_train':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/inaturalist2019_alphabet_csvs/train/Amphibians.csv',
        'csv_val':
        '/nfs/home4/mhouben/facenet_pytorch/datasets/inaturalist2019_alphabet_csvs/val/Amphibians.csv',
        'best_models_path': ''
    }]
    #if args.pure_validation_all_models_all_datasets:
    #    for pretraining_set in dataset_names_csvs:
    #        model = load_model(pretraining_set['best_model_path'])
    #        for validation_set in dataset_names_csvs:
    #            pure_validation(model, validation_set['csv_val'])

    if args.dataset_name == 'omniglot_1_folder_splits':
        dataset_path = os.path.join(os.path.expanduser(datasets_path),
                                    args.dataset_name)
        dataloaders, dataset_sizes, num_classes = load_own_data(
            dataset_path, args.train_csv_path, args.val_csv_path,
            args.image_count, args.train_format, args.valid_format,
            args.train_dataset_depth, args.val_dataset_depth, data_transforms,
            args.batch_size)
    elif args.dataset_name == 'vggface2':
        dataset_path = os.path.join(os.path.expanduser(datasets_path),
                                    args.dataset_name)
        dataloaders, dataset_sizes, num_classes = load_own_data(
            dataset_path, args.train_csv_path, args.val_csv_path,
            args.image_count, args.train_format, args.valid_format,
            args.train_dataset_depth, args.val_dataset_depth, data_transforms,
            args.batch_size)
    elif args.dataset_name == 'CASIA_aligned':
        dataset_path = os.path.join(os.path.expanduser(datasets_path),
                                    args.dataset_name)
        dataloaders, dataset_sizes, num_classes = load_own_data(
            dataset_path, args.train_csv_path, args.val_csv_path,
            args.image_count, args.train_format, args.valid_format,
            args.train_dataset_depth, args.val_dataset_depth, data_transforms,
            args.batch_size)
    elif args.dataset_name == 'inat_reptiles':
        dataset_path = os.path.join(os.path.expanduser(datasets_path),
                                    args.dataset_name)
        dataloaders, dataset_sizes, num_classes = load_own_data(
            dataset_path, args.train_csv_path, args.val_csv_path,
            args.image_count, args.train_format, args.valid_format,
            args.train_dataset_depth, args.val_dataset_depth, data_transforms,
            args.batch_size)
    elif args.dataset_name == 'inat_amphibians':
        dataset_path = os.path.join(os.path.expanduser(datasets_path),
                                    args.dataset_name)
        dataloaders, dataset_sizes, num_classes = load_own_data(
            dataset_path, args.train_csv_path, args.val_csv_path,
            args.image_count, args.train_format, args.valid_format,
            args.train_dataset_depth, args.val_dataset_depth, data_transforms,
            args.batch_size)
    elif args.torchvision_dataset:
        dataset_path = os.path.join(os.path.expanduser(datasets_path),
                                    args.dataset_name)
        dataloaders, dataset_sizes, num_classes = load_torchvision_data(
            args.dataset_name, dataset_path, data_transforms, dataset_depth,
            args.batch_size)
    else:
        raise Exception("This dataset is not known.")

    print("num_classes: ", num_classes)
    #load model(s)
    if args.run_all_models:
        MODEL_NAMES = [
            #'squeezenetv10',
            #'squeezenetv11',
            '5_layer_net',
            'resnet34',
            'vgg16',
            'googlenet',
            'alexnet'
        ]
        #model_parameters = {
        #    'model_name': 'squeezenetv10',
        #    'learning_rate': 0.001, #perhaps some scheduler here that works well for the given network
        #    'criterion': nn.CrossEntropyLoss()
        #}

        for model_name in MODEL_NAMES:
            model = load_tv_model(model_name, num_classes,
                                  args.pretrained_imagenet)

            # print("resnet34 model: ", model)
            # model = vgg16(num_classes)
            model = model.to(device)
            print("model: ", model)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.SGD(model.parameters(),
                                  lr=args.learning_rate,
                                  momentum=0.9)
            scheduler = lr_scheduler.StepLR(optimizer,
                                            step_size=args.num_epochs / 4,
                                            gamma=0.1)
            try:
                model_ft = train_model(device,
                                       model,
                                       model_name,
                                       criterion,
                                       optimizer,
                                       scheduler,
                                       args.dataset_name,
                                       dataloaders,
                                       dataset_sizes,
                                       log_path,
                                       model_path,
                                       num_epochs=args.num_epochs)
            except:
                print("traceback: ", traceback.format_exc())
                print("something went wrong with model ", model_name)

    else:

        model = load_tv_model(args.model_name, num_classes,
                              args.pretrained_imagenet)

        # print("resnet34 model: ", model)
        # model = vgg16(num_classes)
        model = model.to(device)
        print("model: ", model)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(),
                              lr=args.learning_rate,
                              momentum=0.9)
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.num_epochs / 4,
                                        gamma=0.1)

        model_ft = train_model(device,
                               model,
                               model_name,
                               criterion,
                               optimizer,
                               scheduler,
                               args.dataset_name,
                               dataloaders,
                               dataset_sizes,
                               log_path,
                               model_path,
                               num_epochs=args.num_epochs)
Exemplo n.º 11
0
def train(args):
    """Trains model for args.nepochs (default = 30)"""

    t_start = time.time()
    train_data = coco_loader(args.coco_root,
                             split='train',
                             ncap_per_img=args.ncap_per_img)
    print('[DEBUG] Loading train data ... %f secs' % (time.time() - t_start))

    train_data_loader = DataLoader(dataset=train_data, num_workers=args.nthreads,\
      batch_size=args.batchsize, shuffle=True, drop_last=True)

    lang_model = Seq2Seq(train_data.numwords)
    lang_model = lang_model.cuda()
    lang_model.load_state_dict(
        torch.load('log_model/bestmodel.pth')['lang_state_dict'])
    lang_model.train()
    #Load pre-trained imgcnn
    model_imgcnn = Vgg16Feats()
    model_imgcnn.cuda()
    model_imgcnn.train(True)
    model_imgcnn.load_state_dict(
        torch.load('log_reg/bestmodel.pth')['img_state_dict'])
    #Convcap model
    model_convcap = convcap(train_data.numwords,
                            args.num_layers,
                            is_attention=args.attention)
    model_convcap.cuda()
    model_convcap.load_state_dict(
        torch.load('log_reg/bestmodel.pth')['state_dict'])
    model_convcap.train(True)

    optimizer = optim.RMSprop(model_convcap.parameters(),
                              lr=args.learning_rate)
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=args.lr_step_size,
                                    gamma=.1)
    img_optimizer = None

    batchsize = args.batchsize
    ncap_per_img = args.ncap_per_img
    batchsize_cap = batchsize * ncap_per_img
    max_tokens = train_data.max_tokens
    nbatches = np.int_(np.floor((len(train_data.ids) * 1.) / batchsize))
    bestscore = .0

    for epoch in range(args.epochs):
        loss_train = 0.

        if (epoch == args.finetune_after):
            img_optimizer = optim.RMSprop(model_imgcnn.parameters(), lr=1e-5)
            img_scheduler = lr_scheduler.StepLR(img_optimizer,
                                                step_size=args.lr_step_size,
                                                gamma=.1)

        scheduler.step()
        if (img_optimizer):
            img_scheduler.step()
        it = 0
        #One epoch of train
        for batch_idx, (imgs, captions, wordclass, mask, _) in \
          tqdm(enumerate(train_data_loader), total=nbatches):
            it = it + 1
            imgs = imgs.view(batchsize, 3, 224, 224)
            wordclass = wordclass.view(batchsize_cap, max_tokens).cuda()
            mask = mask.view(batchsize_cap, max_tokens)

            captions = utils.decode_sequence(train_data.wordlist, wordclass,
                                             None)
            captions_all = []
            for index, caption in enumerate(captions):
                captions_all.append(caption)

            imgs_v = Variable(imgs).cuda()
            wordclass_v = Variable(wordclass).cuda()

            optimizer.zero_grad()
            if (img_optimizer):
                img_optimizer.zero_grad()

            imgsfeats, imgsfc7 = model_imgcnn(imgs_v)
            imgsfeats, imgsfc7 = repeat_img_per_cap(imgsfeats, imgsfc7,
                                                    ncap_per_img)
            _, _, feat_h, feat_w = imgsfeats.size()

            if (args.attention == True):
                wordact, attn = model_convcap(imgsfeats, imgsfc7, wordclass_v)
                attn = attn.view(batchsize_cap, max_tokens, feat_h, feat_w)
            else:
                wordact, _ = model_convcap(imgsfeats, imgsfc7, wordclass_v)

            wordact = wordact[:, :, :-1]
            wordclass_v = wordclass_v[:, 1:]
            mask = mask[:, 1:].contiguous()

            wordact_t = wordact.permute(0, 2, 1).contiguous().view(\
              batchsize_cap*(max_tokens-1), -1)
            wordclass_t = wordclass_v.contiguous().view(\
              batchsize_cap*(max_tokens-1), 1)

            maskids = torch.nonzero(mask.view(-1)).numpy().reshape(-1)

            if (args.attention == True):
                #Cross-entropy loss and attention loss of Show, Attend and Tell
                loss_xe = F.cross_entropy(wordact_t[maskids, ...], \
                  wordclass_t[maskids, ...].contiguous().view(maskids.shape[0])) \
                  + (torch.sum(torch.pow(1. - torch.sum(attn, 1), 2)))\
                  /(batchsize_cap*feat_h*feat_w)
            else:
                loss_xe = F.cross_entropy(wordact_t[maskids, ...], \
                  wordclass_t[maskids, ...].contiguous().view(maskids.shape[0]))

            wordact = lang_model(wordclass_v.transpose(1, 0),
                                 wordclass_v.transpose(1, 0), imgs)
            wordact = wordact.transpose(1, 0)[:, :-1, :]
            wordclass_v = wordclass_v[:, 1:]

            wordact_t = wordact.contiguous().view(\
              batchsize_cap*wordact.size(1), -1)

            wordclass_t = wordclass_v.contiguous().view(\
              batchsize_cap*wordclass_v.size(1), 1)

            loss_xe_lang = F.cross_entropy(wordact_t[...], \
                wordclass_t[...].contiguous().view(-1))

            with torch.no_grad():
                outcap, sampled_ids, sample_logprobs, x_all_langauge, outputs = lang_model.sample(
                    wordclass.transpose(1, 0), wordclass.transpose(1, 0),
                    imgsfeats.transpose(1, 0), train_data.wordlist)

            logprobs_input, _ = model_convcap(imgsfeats, imgsfc7,
                                              sampled_ids.long().cuda())
            log_probs = F.log_softmax(
                logprobs_input.transpose(2, 1)[:, :-1, :], -1)

            sample_logprobs_true = log_probs.gather(
                2, sampled_ids[:, 1:].cuda().long().unsqueeze(2))
            with torch.no_grad():
                reward = get_self_critical_reward(batchsize_cap, lang_model,
                                                  wordclass.transpose(1, 0),
                                                  imgsfeats.transpose(1, 0),
                                                  outcap, captions_all,
                                                  train_data.wordlist, 16)

            loss_rl1 = rl_crit(
                torch.exp(sample_logprobs_true.squeeze()) /
                torch.exp(sample_logprobs[:, 1:]).cuda().detach(),
                sampled_ids[:, 1:].cpu(),
                torch.from_numpy(reward).float().cuda())
            #loss_rl2 = rl_crit(sample_logprobs[:,1:].cuda(), sampled_ids[:, 1:].cpu(), torch.from_numpy(reward).float().cuda())

            loss = 0.0 * loss_xe + loss_rl1  # + loss_xe_lang + loss_rl2

            if it % 500 == 0:
                modelfn = osp.join(args.model_dir, 'model.pth')
                scores = test(args,
                              'val',
                              model_convcap=model_convcap,
                              model_imgcnn=model_imgcnn)
                score = scores[0][args.score_select]
                if (score > bestscore):
                    bestscore = score
                    print('[DEBUG] Saving model at epoch %d with %s score of %f'\
                       % (epoch, args.score_select, score))
                    bestmodelfn = osp.join(args.model_dir, 'bestmodel.pth')
                    os.system('cp %s %s' % (modelfn, bestmodelfn))

                torch.save(
                    {
                        'epoch': epoch,
                        'state_dict': model_convcap.state_dict(),
                        'img_state_dict': model_imgcnn.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'lang_state_dict': lang_model.state_dict()
                    }, modelfn)

            loss_train = loss_train + loss

            loss.backward()

            optimizer.step()
            if (img_optimizer):
                img_optimizer.step()

        loss_train = (loss_train * 1.) / (batch_idx)
        print('[DEBUG] Training epoch %d has loss %f' % (epoch, loss_train))

        modelfn = osp.join(args.model_dir, 'model.pth')

        if (img_optimizer):
            img_optimizer_dict = img_optimizer.state_dict()
        else:
            img_optimizer_dict = None

        torch.save(
            {
                'epoch': epoch,
                'state_dict': model_convcap.state_dict(),
                'img_state_dict': model_imgcnn.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lang_state_dict': lang_model.state_dict()
            }, modelfn)

        #Run on validation and obtain score
        scores = test(args,
                      'val',
                      model_convcap=model_convcap,
                      model_imgcnn=model_imgcnn)
        score = scores[0][args.score_select]

        if (score > bestscore):
            bestscore = score
            print('[DEBUG] Saving model at epoch %d with %s score of %f'\
              % (epoch, args.score_select, score))
            bestmodelfn = osp.join(args.model_dir, 'bestmodel.pth')
            os.system('cp %s %s' % (modelfn, bestmodelfn))
num_classes = 100

trainset = Dataset(train_dir, dirname_to_classname_path, num_classes)
testset = Dataset(val_dir, dirname_to_classname_path, num_classes)
train_dataloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size_train, shuffle=True, num_workers=8)
test_dataloader = torch.utils.data.DataLoader(testset, batch_size=batch_size_train, shuffle=False, num_workers=8)

net = GoogLeNet(num_classes, mode='train').cuda()
net.init_weights('KAMING')
if pretrained_weights != None:
    net_pretrain = torch.load(pretrained_weights)
    net.load_state_dict(net_pretrain)
    
criterion = nn.CrossEntropyLoss().cuda()
optimizer= optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum, weight_decay=0.0001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.5) # original = 0.96

train_loss_list = list()
train_accuracy_list = list()
test_loss_list = list()
test_accuracy_list = list()

for epoch in range(num_epoch):
    time_s = time.time()
    print('Epoch : ', epoch + 1, optimizer)

    net.train()
    
    for batch_idx, (img, y_GT) in enumerate(train_dataloader):
        img = img.permute(0, 3, 1, 2).float()
         
Exemplo n.º 13
0
def main():

    # Configurations
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    n_class = 1
    batch_size = args.batchsize
    epochs = 10
    lr = 1e-4
    momentum = 0
    w_decay = 1e-5
    step_size = 50
    gamma = 0.5
    configs = "FCNs-MSE_batch{}_epoch{}_RMSprop_scheduler-step{}-gamma{}_lr{}_momentum{}_w_decay{}".format(
        batch_size, epochs, step_size, gamma, lr, momentum, w_decay)
    print("Configs:", configs)

    # Create dir for model
    output_dir = os.path.join(args.exp_dir, args.exp_name)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if not os.path.exists(os.path.join(output_dir, 'checkpoints')):
        os.makedirs(os.path.join(output_dir, 'checkpoints'))

    use_gpu = torch.cuda.is_available()
    num_gpu = list(range(torch.cuda.device_count()))

    # Traning and validation loaders
    train_data = DHF1KDualDataset('../dataset/DHF1K/train/data_fix',
                                  '../dataset/DHF1K/train/flows_fix',
                                  '../dataset/DHF1K/train/target_fix', 640,
                                  360)
    train_loader = DataLoader(train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=1)

    val_data = DHF1KDualDataset('../dataset/DHF1K/val/data',
                                '../dataset/DHF1K/val/flows_fix',
                                '../dataset/DHF1K/val/target_fix',
                                640,
                                360,
                                small_part=10)
    val_loader = DataLoader(val_data, batch_size=16, num_workers=1)

    test_data = DHF1KDualDataset('../dataset/DHF1K/test/data',
                                 '../dataset/DHF1K/test/flows_fix',
                                 '../dataset/DHF1K/test/target', 640, 360)
    test_loader = DataLoader(test_data, batch_size=16, num_workers=4)

    fcn_model = SaliencyDualFCN(n_class=n_class)

    if use_gpu:
        ts = time.time()
        fcn_model = fcn_model.cuda()
        fcn_model = nn.DataParallel(fcn_model, device_ids=num_gpu)
        print("Finish cuda loading, time elapsed {}".format(time.time() - ts))

    # Optimizer and Loss Function
    criterion = nn.MSELoss()
    optimizer = optim.RMSprop(fcn_model.parameters(),
                              lr=lr,
                              momentum=momentum,
                              weight_decay=w_decay)
    scheduler = lr_scheduler.StepLR(
        optimizer, step_size=step_size,
        gamma=gamma)  # decay LR by a factor of 0.5 every 30 epochs

    train(args, train_loader, val_loader, test_loader, fcn_model, scheduler,
          optimizer, output_dir, use_gpu, epochs, criterion)
Exemplo n.º 14
0
def Train(Model, args):

    writer = SummaryWriter()
    beta1_Adam = args.beta1
    beta2_Adam = args.beta2


    if args.cuda:
        Model.cuda()

    #optimizer = optim.Adam(Model.parameters(), lr=args.lr, betas=(beta1_Adam, beta2_Adam))
    optimizer = optim.SGD(Model.parameters(), lr=args.lr)

    if args.resume:
        checkpoint = torch.load(args.resume)
        optimizer.load_state_dict(checkpoint['optimizer'])

    Model.train()


    steps = 0
    #loss_criterion_Angular = AngleLoss().cuda()
    CUDNN.benchmark = True
    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
    if args.dynamic_lr == True:
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3000, verbose=False,
                                                   threshold=0.00001, threshold_mode='rel', cooldown=2000, min_lr=0,
                                                   eps=1e-08)
    for epoch in range(args.start_epoch, args.epochs+1):
        #if epoch==3:
            #optimizer = optim.SGD(Model.parameters(), lr=args.lr)
        # Every args.lr_step, changes learning rate by multipling args.lr_decay
                    #adjust_learning_rate(optimizer, epoch, args)

        # Load augmented data
        #transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place,
                                        #transform = transforms.Compose([Resize((256,256)), RandomCrop((224,224))])) #for ResNet256x256->224x224 for VGG110x110->96x96
        # transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place,
        #                                             transforms.Compose([transforms.Resize(256), transforms.RandomCrop(224),transforms.ToTensor()]))  # for ResNet256x256->224x224 for VGG110x110->96x96
        transformed_dataset = FaceIdPoseDataset(args.train_csv_file, args.data_place,transforms.Compose([transforms.Resize(256),
                                                                                                         transforms.RandomCrop(224),
                                                                                                         transforms.ToTensor()
                                                                                                         ]))  # for ResNet256x256->224x224 for VGG110x110->96x96

        dataloader = DataLoader(transformed_dataset, batch_size=args.Train_Batch, shuffle=True, num_workers=8)
        if args.stepsize > 0:
            scheduler.step()

        for i, batch_data in enumerate(dataloader):
            # backward() function accumulates gradients, however we don't want to mix up gradients between minibatches
            optimizer.zero_grad()
            batch_image = torch.FloatTensor(batch_data[0].float())

            batch_id_label = batch_data[2]

            if args.cuda:
                batch_image, batch_id_label = batch_image.cuda(), batch_id_label.cuda()

            batch_image, batch_id_label = Variable(batch_image), Variable(batch_id_label)

            steps += 1

            Prediction = Model(batch_image)
            Loss = Model.ID_Loss(Prediction, batch_id_label)
            #Loss = loss_criterion_Angular(Prediction, batch_id_label)

            Loss.backward()
            optimizer.step()
            if args.dynamic_lr == True:
                scheduler.step(Loss)
            log_learning(epoch, steps, 'ResNet50_Model', args.lr, Loss.item(), args)
            writer.add_scalar('Train/Train_Loss', Loss, steps)
            writer.add_scalar('Train/Model_Lr', optimizer.param_groups[0]['lr'], epoch)

            # Validation_Process(Model, epoch, writer, args)
        Validation_Process(Model, epoch, writer, args)

        if epoch % args.save_freq == 0:
            if not os.path.isdir(args.snapshot_dir): os.makedirs(args.snapshot_dir)
            save_path = os.path.join(args.snapshot_dir, 'epoch{}.pt'.format(epoch))
            torch.save(Model.state_dict(), save_path)
            save_checkpoint({
                'epoch': epoch + 1,
                'Model': Model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, save_dir=os.path.join(args.snapshot_dir, 'epoch{}'.format(epoch)))

    # export scalar data to JSON for external processing
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
Exemplo n.º 15
0
def experience_mnist(config, path, param):
    print("START MNIST")
    use_cuda = config.general.use_cuda and torch.cuda.is_available()
    torch.manual_seed(config.general.seed)
    device = torch.device("cuda" if use_cuda else "cpu")
    print("START TRAINING TARGET MODEL")
    data_train_target = custum_MNIST(True,
                                     0,
                                     config,
                                     '../data',
                                     train=True,
                                     download=True,
                                     transform=transforms.Compose([
                                         transforms.ToTensor(),
                                         transforms.Normalize((0.1307, ),
                                                              (0.3081, ))
                                     ]))
    data_test_target = custum_MNIST(True,
                                    0,
                                    config,
                                    '../data',
                                    train=False,
                                    transform=transforms.Compose([
                                        transforms.ToTensor(),
                                        transforms.Normalize((0.1307, ),
                                                             (0.3081, ))
                                    ]))
    criterion = nn.CrossEntropyLoss()
    train_loader_target = torch.utils.data.DataLoader(
        data_train_target, batch_size=config.learning.batch_size, shuffle=True)
    test_loader_target = torch.utils.data.DataLoader(
        data_test_target, batch_size=config.learning.batch_size, shuffle=True)
    dataloaders_target = {
        "train": train_loader_target,
        "val": test_loader_target
    }
    dataset_sizes_target = {
        "train": len(data_train_target),
        "val": len(data_test_target)
    }
    print("TAILLE dataset", dataset_sizes_target)
    model_target = Net_mnist().to(device)
    optimizer = optim.SGD(model_target.parameters(),
                          lr=config.learning.learning_rate,
                          momentum=config.learning.momentum)
    exp_lr_scheduler = lr_scheduler.StepLR(
        optimizer,
        step_size=config.learning.decrease_lr_factor,
        gamma=config.learning.decrease_lr_every)
    model_target, best_acc_target, data_test_set, label_test_set, class_test_set = train_model(
        model_target,
        criterion,
        optimizer,
        exp_lr_scheduler,
        dataloaders_target,
        dataset_sizes_target,
        num_epochs=config.learning.epochs)
    np.save(path + "/res_train_target_" + str(param) + ".npy", best_acc_target)
    print("START TRAINING SHADOW MODEL")
    all_shadow_models = []
    all_dataloaders_shadow = []
    data_train_set = []
    label_train_set = []
    class_train_set = []
    for num_model_sahdow in range(config.general.number_shadow_model):
        criterion = nn.CrossEntropyLoss()

        data_train_shadow = custum_MNIST(False,
                                         num_model_sahdow,
                                         config,
                                         '../data',
                                         train=True,
                                         download=True,
                                         transform=transforms.Compose([
                                             transforms.ToTensor(),
                                             transforms.Normalize((0.1307, ),
                                                                  (0.3081, ))
                                         ]))
        data_test_shadow = custum_MNIST(False,
                                        num_model_sahdow,
                                        config,
                                        '../data',
                                        train=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                            transforms.Normalize((0.1307, ),
                                                                 (0.3081, ))
                                        ]))
        train_loader_shadow = torch.utils.data.DataLoader(
            data_train_shadow,
            batch_size=config.learning.batch_size,
            shuffle=True)
        test_loader_shadow = torch.utils.data.DataLoader(
            data_test_shadow,
            batch_size=config.learning.batch_size,
            shuffle=True)
        dataloaders_shadow = {
            "train": train_loader_shadow,
            "val": test_loader_shadow
        }
        dataset_sizes_shadow = {
            "train": len(data_train_shadow),
            "val": len(data_test_shadow)
        }
        print("TAILLE dataset", dataset_sizes_shadow)
        model_shadow = Net_mnist().to(device)
        optimizer = optim.SGD(model_shadow.parameters(),
                              lr=config.learning.learning_rate,
                              momentum=config.learning.momentum)
        exp_lr_scheduler = lr_scheduler.StepLR(
            optimizer,
            step_size=config.learning.decrease_lr_factor,
            gamma=config.learning.decrease_lr_every)
        model_shadow, best_acc_sh, data_train_set_unit, label_train_set_unit, class_train_set_unit = train_model(
            model_shadow,
            criterion,
            optimizer,
            exp_lr_scheduler,
            dataloaders_target,
            dataset_sizes_target,
            num_epochs=config.learning.epochs)
        data_train_set.append(data_train_set_unit)
        label_train_set.append(label_train_set_unit)
        class_train_set.append(class_train_set_unit)
        np.save(
            path + "/res_train_shadow_" + str(num_model_sahdow) + "_" +
            str(param) + ".npy", best_acc_sh)
        all_shadow_models.append(model_shadow)
        all_dataloaders_shadow.append(dataloaders_shadow)
    print("START GETTING DATASET ATTACK MODEL")
    data_train_set = np.concatenate(data_train_set)
    label_train_set = np.concatenate(label_train_set)
    class_train_set = np.concatenate(class_train_set)
    #data_test_set, label_test_set, class_test_set = get_data_for_final_eval([model_target], [dataloaders_target], device)
    #data_train_set, label_train_set, class_train_set = get_data_for_final_eval(all_shadow_models, all_dataloaders_shadow, device)
    data_train_set, label_train_set, class_train_set = shuffle(
        data_train_set,
        label_train_set,
        class_train_set,
        random_state=config.general.seed)
    data_test_set, label_test_set, class_test_set = shuffle(
        data_test_set,
        label_test_set,
        class_test_set,
        random_state=config.general.seed)
    print("Taille dataset train", len(label_train_set))
    print("Taille dataset test", len(label_test_set))
    print("START FITTING ATTACK MODEL")
    model = lgb.LGBMClassifier(objective='binary',
                               reg_lambda=config.learning.ml.reg_lambd,
                               n_estimators=config.learning.ml.n_estimators)
    model.fit(data_train_set, label_train_set)
    y_pred_lgbm = model.predict(data_test_set)
    precision_general, recall_general, _, _ = precision_recall_fscore_support(
        y_pred=y_pred_lgbm, y_true=label_test_set, average="macro")
    accuracy_general = accuracy_score(y_true=label_test_set,
                                      y_pred=y_pred_lgbm)
    precision_per_class, recall_per_class, accuracy_per_class = [], [], []
    for idx_class, classe in enumerate(data_train_target.classes):
        all_index_class = np.where(class_test_set == idx_class)
        precision, recall, _, _ = precision_recall_fscore_support(
            y_pred=y_pred_lgbm[all_index_class],
            y_true=label_test_set[all_index_class],
            average="macro")
        accuracy = accuracy_score(y_true=label_test_set[all_index_class],
                                  y_pred=y_pred_lgbm[all_index_class])
        precision_per_class.append(precision)
        recall_per_class.append(recall)
        accuracy_per_class.append(accuracy)
    print("END MNIST")
    return (precision_general, recall_general, accuracy_general,
            precision_per_class, recall_per_class, accuracy_per_class)
Exemplo n.º 16
0
def train_net(net, train_dataset, valid_dataset, use_gpu, config):
    epoch = config.getint("train", "epoch")
    learning_rate = config.getfloat("train", "learning_rate")
    task_loss_type = config.get("train", "type_of_loss")

    output_time = config.getint("output", "output_time")
    test_time = config.getint("output", "test_time")
    model_path = os.path.join(config.get("output", "model_path"),
                              config.get("output", "model_name"))

    try:
        trained_epoch = config.get("train", "pre_train")
        trained_epoch = int(trained_epoch)
    except Exception as e:
        trained_epoch = 0

    os.makedirs(os.path.join(config.get("output", "tensorboard_path")),
                exist_ok=True)

    if trained_epoch == 0:
        shutil.rmtree(
            os.path.join(config.get("output", "tensorboard_path"),
                         config.get("output", "model_name")), True)

    # writer = SummaryWriter(
    #    os.path.join(config.get("output", "tensorboard_path"), config.get("output", "model_name")),
    #    config.get("output", "model_name"))
    writer = None

    criterion = get_loss(task_loss_type)

    optimizer_type = config.get("train", "optimizer")
    if optimizer_type == "adam":
        optimizer = optim.Adam(net.parameters(),
                               lr=learning_rate,
                               weight_decay=config.getfloat(
                                   "train", "weight_decay"))
    elif optimizer_type == "sgd":
        optimizer = optim.SGD(net.parameters(),
                              lr=learning_rate,
                              momentum=config.getfloat("train", "momentum"),
                              weight_decay=config.getfloat(
                                  "train", "weight_decay"))
    else:
        raise NotImplementedError

    step_size = config.getint("train", "step_size")
    gamma = config.getfloat("train", "gamma")
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                           step_size=step_size,
                                           gamma=gamma)

    print('** start training here! **')
    print(
        '----------------|----------TRAIN-----------|----------VALID-----------|----------------|'
    )
    print(
        '  lr    epoch   |   loss           top-1   |   loss           top-1   |      time      |'
    )
    print(
        '----------------|--------------------------|--------------------------|----------------|'
    )
    start = timer()

    for epoch_num in range(trained_epoch, epoch):
        cnt = 0

        train_cnt = 0
        train_loss = 0
        train_acc = 0

        exp_lr_scheduler.step(epoch_num)
        lr = 0
        for g in optimizer.param_groups:
            lr = float(g['lr'])
            break

        while True:
            cnt += 1
            data = train_dataset.fetch_data(config)
            if data is None:
                break
            '''
            for key in data.keys():
                if isinstance(data[key], torch.Tensor):
                    if torch.cuda.is_available() and use_gpu:
                        data[key] = Variable(data[key].cuda())
                    else:
                        data[key] = Variable(data[key])
            
            '''
            data = DataCuda(data, use_gpu)

            optimizer.zero_grad()

            results = net(data, criterion, config, use_gpu)

            outputs, loss, accu = results["x"], results["loss"], results[
                "accuracy"]

            loss.backward()
            train_loss += loss.item()
            train_acc += accu.item()
            train_cnt += 1

            loss = loss.item()
            accu = accu.item()
            optimizer.step()

            if cnt % output_time == 0:
                print('\r', end='', flush=True)
                print(
                    '%.4f   % 3d    |  %.4f         % 2.2f   |   ????           ?????   |  %s  | %d'
                    % (lr, epoch_num + 1, train_loss / train_cnt, train_acc /
                       train_cnt * 100, time_to_str((timer() - start)), cnt),
                    end='',
                    flush=True)

        train_loss /= train_cnt
        train_acc /= train_cnt

        # writer.add_scalar(config.get("output", "model_name") + " train loss", train_loss, epoch_num + 1)
        # writer.add_scalar(config.get("output", "model_name") + " train accuracy", train_acc, epoch_num + 1)

        if not os.path.exists(model_path):
            os.makedirs(model_path)
        torch.save(net.state_dict(),
                   os.path.join(model_path, "model-%d.pkl" % (epoch_num + 1)))

        valid_loss, valid_accu, auc_result = valid_net(net, valid_dataset,
                                                       use_gpu, config,
                                                       epoch_num + 1, writer)
        print('\r', end='', flush=True)
        print(
            '%.4f   % 3d    |  %.4f          %.2f   |  %.4f         % 2.2f   |  %s  | auc_reuslt: %.4f'
            % (lr, epoch_num + 1, train_loss, train_acc * 100, valid_loss,
               valid_accu * 100, time_to_str((timer() - start)), auc_result))
Exemplo n.º 17
0
def main():
    # so other functions can access these variables
    global args, dataloaders, data_sizes, image_sets
    args = get_user_args()

    # defining processing device, if cuda is available then GPU else CPU
    device = torch.device("cuda:0" if (
        torch.cuda.is_available() and args.gpu) else "cpu")

    print('=> beginning training using {}'.format(str(device).upper()))

    # lets the user know which model is being trained
    print('=> creating model: {}'.format(args.arch))
    model = models.__dict__[args.arch](pretrained=True)

    print('* ' * 20)

    model.to(device)  # send device to processor

    # image location with child folders of train, valid, test
    data_dir = Path(args.data)
    train_dir = data_dir / 'train'
    valid_dir = data_dir / 'valid'
    test_dir = data_dir / 'test'

    # variable for various iterations later
    states = ['train', 'valid', 'test']

    # for easy iteration later
    dirs_dict = {'train': train_dir, 'valid': valid_dir, 'test': test_dir}

    # image normalization parameters, predefined
    normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])

    # transforms for valid and test data, use same parameters
    valid_test_transforms = [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize]

    data_transforms = {
        'train':  # vector manipulation for generalized learning
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomRotation(30),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            normalize
        ]),
        'valid':
        transforms.Compose(valid_test_transforms),
        'test':
        transforms.Compose(valid_test_transforms)
    }

    image_sets = {
        i_set: datasets.ImageFolder(
            dirs_dict[i_set], transform=data_transforms[i_set])
        for i_set in states
    }

    dataloaders = {
        'train': torch.utils.data.DataLoader(
            image_sets['train'], batch_size=args.batch_size, shuffle=True),
        'valid': torch.utils.data.DataLoader(image_sets['valid'],       batch_size=args.batch_size),
        'test': torch.utils.data.DataLoader(image_sets['test'], batch_size=args.batch_size)
    }
    classes = image_sets['train'].classes
    data_sizes = {x: len(image_sets[x]) for x in states}

    for p in model.parameters():
        p.requires_grad = False  # ensures gradients aren't calculated for parameters

    classifier = nn.Sequential(
        OrderedDict([
            ('fc1', nn.Linear(
                model.classifier[0].in_features, args.hidden_units)),
            ('relu1,', nn.ReLU()),
            ('dropout', nn.Dropout(args.dropout)),
            ('fc2', nn.Linear(args.hidden_units, len(classes))),
            ('output', nn.LogSoftmax(dim=1)),
        ]))

    model.classifier = classifier

    criterion = nn.NLLLoss()

    optimizer = optim.Adam(model.classifier.parameters(),
                           lr=args.learning_rate)

    scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.0125)

    model_trained = train(model, optimizer, criterion,
                          scheduler, args.epochs, device)

    save_checkpoint(model_trained, args.epochs, args.save_dir,
                    args.arch, args.learning_rate, optimizer, args.hidden_units)
Exemplo n.º 18
0
        shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        torchvision.datasets.FashionMNIST(
            root="data",
            train=False,
            transform=torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize([0], [1]),
            ]),
        ),
        batch_size=32)
    cnn = LeNet(input_shape=(28, 28, 1), num_classes=10)
    cnn = cnn.cuda() if simulation_config['use_gpu'] else cnn

    optimizer = torch.optim.Adam(cnn.parameters(), lr=0.005)
    lr_sch = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.2)
    criterion = torch.nn.CrossEntropyLoss()

    # for i in range(20):
    #     train_ann(model=cnn, train_loader=train_loader, optimizer=optimizer, criterion=criterion, epoch=i+1, simulation_config=simulation_config)
    #
    #     test_ann(model=cnn, test_loader=test_loader, criterion=criterion, simulation_config=simulation_config)
    #     lr_sch.step()
    # torch.save(cnn.state_dict(), 'model_output/LeNet_fashionmnist_params.pkl')

    device = torch.device('cuda')
    scnn = SpikingLeNet(input_shape=(28, 28, 1),
                        num_classes=10,
                        if_param=IFParameters(),
                        device=device,
                        seq_length=simulation_config['seq_length'],
Exemplo n.º 19
0
                            num_workers=num_workers)
    folds_training_losses = []
    folds_val_losses = []
    folds_val_mean_losses = []

    for i in range(2):
        print("CV: ", i)
        train_epochs_mean_losses = []
        val_epochs_mean_losses = []
        val_epochs = []
        i_fold_val_scores = []

        model = MRIRegressor(feats, dropout_p).to(device=device)
        optimizer = Adam(model.parameters(), lr, weight_decay=weight_decay)
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=1,
                                        gamma=gamma,
                                        last_epoch=-1)

        for epoch in range(num_epochs):
            epoch_train_batch_losses = train_epoch(model, train_loader,
                                                   loss_function, optimizer,
                                                   scheduler, device)
            train_mean_loss = np.mean(epoch_train_batch_losses)
            train_max_loss = np.max(epoch_train_batch_losses)
            train_epochs_mean_losses.append(train_mean_loss)
            if epoch % 5 == 0 or epoch == num_epochs - 1:
                val_epochs.append(epoch)
                epoch_val_batch_losses = eval_epoch(model, val_loader,
                                                    loss_function, device)
                val_mean_loss = np.mean(epoch_val_batch_losses)
                val_epochs_mean_losses.append(val_mean_loss)
Exemplo n.º 20
0
            nn.LeakyReLU(0.2, True),
            # layer 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid())
        self.to(DEVICE)
        print_network(self)

    def forward(self, x):
        return nn.parallel.data_parallel(self.net, x).view(-1, 1).squeeze(1)


G = generator(nz, nc, ngf)
D = discriminator(3, ndf)
opt_G = optim.Adam(G.parameters(), lr, betas=[0.5, 0.999])
opt_D = optim.Adam(D.parameters(), lr, betas=[0.5, 0.999])
scheduler_lr = lr_scheduler.StepLR(opt_G, step_size=1, gamma=0.9)
criterion = nn.BCELoss()

for epoch in range(0, n_epochs):
    G.train()
    D.train()
    _batch = 0
    scheduler_lr.step()
    for X, _ in train_iter:
        _batch += 1

        real_x = X.to(DEVICE)
        z = T.randn(real_x.size(0), nz, 1, 1, device=DEVICE)
        fake_x = G(z)

        # instance noise trick
Exemplo n.º 21
0
    def __init__(self, opt):
        print('SRRaGANModelllll', opt)
        super(SRRaGANModel, self).__init__(opt)
        train_opt = opt['train']

        if self.is_train:
            if opt['datasets']['train']['znorm']:
                z_norm = opt['datasets']['train']['znorm']
            else:
                z_norm = False

        # define networks and load pretrained models
        self.netG = networks.define_G(opt).to(self.device)  # G
        if self.is_train:
            self.netG.train()
            if train_opt['gan_weight']:
                self.netD = networks.define_D(opt).to(self.device)  # D
                self.netD.train()
        self.load()  # load G and D if needed

        # define losses, optimizer and scheduler
        if self.is_train:
            # Define if the generator will have a final capping mechanism in the output
            self.outm = None
            if train_opt['finalcap']:
                self.outm = train_opt['finalcap']

            # G pixel loss
            # """
            if train_opt['pixel_weight']:
                if train_opt['pixel_criterion']:
                    l_pix_type = train_opt['pixel_criterion']
                else:  # default to cb
                    l_fea_type = 'cb'

                if l_pix_type == 'l1':
                    self.cri_pix = nn.L1Loss().to(self.device)
                elif l_pix_type == 'l2':
                    self.cri_pix = nn.MSELoss().to(self.device)
                elif l_pix_type == 'cb':
                    self.cri_pix = CharbonnierLoss().to(self.device)
                elif l_pix_type == 'elastic':
                    self.cri_pix = ElasticLoss().to(self.device)
                elif l_pix_type == 'relativel1':
                    self.cri_pix = RelativeL1().to(self.device)
                elif l_pix_type == 'l1cosinesim':
                    self.cri_pix = L1CosineSim().to(self.device)
                else:
                    raise NotImplementedError(
                        'Loss type [{:s}] not recognized.'.format(l_pix_type))
                self.l_pix_w = train_opt['pixel_weight']
            else:
                logger.info('Remove pixel loss.')
                self.cri_pix = None
            # """

            # G feature loss
            # """
            if train_opt['feature_weight']:
                if train_opt['feature_criterion']:
                    l_fea_type = train_opt['feature_criterion']
                else:  # default to l1
                    l_fea_type = 'l1'

                if l_fea_type == 'l1':
                    self.cri_fea = nn.L1Loss().to(self.device)
                elif l_fea_type == 'l2':
                    self.cri_fea = nn.MSELoss().to(self.device)
                elif l_fea_type == 'cb':
                    self.cri_fea = CharbonnierLoss().to(self.device)
                elif l_fea_type == 'elastic':
                    self.cri_fea = ElasticLoss().to(self.device)
                else:
                    raise NotImplementedError(
                        'Loss type [{:s}] not recognized.'.format(l_fea_type))
                self.l_fea_w = train_opt['feature_weight']
            else:
                logger.info('Remove feature loss.')
                self.cri_fea = None
            if self.cri_fea:  # load VGG perceptual loss
                self.netF = networks.define_F(opt,
                                              use_bn=False).to(self.device)
            # """

            # HFEN loss
            # """
            if train_opt['hfen_weight']:
                l_hfen_type = train_opt['hfen_criterion']
                if train_opt['hfen_presmooth']:
                    pre_smooth = train_opt['hfen_presmooth']
                else:
                    pre_smooth = False  # train_opt['hfen_presmooth']
                if l_hfen_type:
                    if l_hfen_type == 'rel_l1' or l_hfen_type == 'rel_l2':
                        relative = True
                    else:
                        relative = False  # True #train_opt['hfen_relative']
                if l_hfen_type:
                    self.cri_hfen = HFENLoss(loss_f=l_hfen_type,
                                             device=self.device,
                                             pre_smooth=pre_smooth,
                                             relative=relative).to(self.device)
                else:
                    raise NotImplementedError(
                        'Loss type [{:s}] not recognized.'.format(l_hfen_type))
                self.l_hfen_w = train_opt['hfen_weight']
            else:
                logger.info('Remove HFEN loss.')
                self.cri_hfen = None
            # """

            # TV loss
            # """
            if train_opt['tv_weight']:
                self.l_tv_w = train_opt['tv_weight']
                l_tv_type = train_opt['tv_type']
                if train_opt['tv_norm']:
                    tv_norm = train_opt['tv_norm']
                else:
                    tv_norm = 1

                if l_tv_type == 'normal':
                    self.cri_tv = TVLoss(self.l_tv_w,
                                         p=tv_norm).to(self.device)
                elif l_tv_type == '4D':
                    # Total Variation regularization in 4 directions
                    self.cri_tv = TVLoss4D(self.l_tv_w).to(self.device)
                else:
                    raise NotImplementedError(
                        'Loss type [{:s}] not recognized.'.format(l_tv_type))
            else:
                logger.info('Remove TV loss.')
                self.cri_tv = None
            # """

            # SSIM loss
            # """
            if train_opt['ssim_weight']:
                self.l_ssim_w = train_opt['ssim_weight']

                if train_opt['ssim_type']:
                    l_ssim_type = train_opt['ssim_type']
                else:  # default to ms-ssim
                    l_ssim_type = 'ms-ssim'

                if l_ssim_type == 'ssim':
                    self.cri_ssim = SSIM(win_size=11,
                                         win_sigma=1.5,
                                         size_average=True,
                                         data_range=1.,
                                         channel=3).to(self.device)
                elif l_ssim_type == 'ms-ssim':
                    self.cri_ssim = MS_SSIM(win_size=11,
                                            win_sigma=1.5,
                                            size_average=True,
                                            data_range=1.,
                                            channel=3).to(self.device)
            else:
                logger.info('Remove SSIM loss.')
                self.cri_ssim = None
            # """

            # LPIPS loss
            """
            lpips_spatial = False
            if train_opt['lpips_spatial']:
                #lpips_spatial = True if train_opt['lpips_spatial'] == True else False
                lpips_spatial = True if train_opt['lpips_spatial'] else False
            lpips_GPU = False
            if train_opt['lpips_GPU']:
                #lpips_GPU = True if train_opt['lpips_GPU'] == True else False
                lpips_GPU = True if train_opt['lpips_GPU'] else False
            #"""
            # """
            lpips_spatial = True  # False # Return a spatial map of perceptual distance. Meeds to use .mean() for the backprop if True, the mean distance is approximately the same as the non-spatial distance
            lpips_GPU = True  # Whether to use GPU for LPIPS calculations
            if train_opt['lpips_weight']:
                if z_norm == True:  # if images are in [-1,1] range
                    # images are already in the [-1,1] range
                    self.lpips_norm = False
                else:
                    # normalize images from [0,1] range to [-1,1]
                    self.lpips_norm = True

                self.l_lpips_w = train_opt['lpips_weight']
                # Can use original off-the-shelf uncalibrated networks 'net' or Linearly calibrated models (LPIPS) 'net-lin'
                if train_opt['lpips_type']:
                    lpips_type = train_opt['lpips_type']
                else:  # Default use linearly calibrated models, better results
                    lpips_type = 'net-lin'
                # Can set net = 'alex', 'squeeze' or 'vgg' or Low-level metrics 'L2' or 'ssim'
                if train_opt['lpips_net']:
                    lpips_net = train_opt['lpips_net']
                else:  # Default use VGG for feature extraction
                    lpips_net = 'vgg'
                self.cri_lpips = models.PerceptualLoss(
                    model=lpips_type,
                    net=lpips_net,
                    use_gpu=lpips_GPU,
                    model_path=None,
                    spatial=lpips_spatial)  # .to(self.device)
                # Linearly calibrated models (LPIPS)
                # self.cri_lpips = models.PerceptualLoss(model='net-lin', net='alex', use_gpu=lpips_GPU, model_path=None, spatial=lpips_spatial) #.to(self.device)
                # self.cri_lpips = models.PerceptualLoss(model='net-lin', net='vgg', use_gpu=lpips_GPU, model_path=None, spatial=lpips_spatial) #.to(self.device)
                # Off-the-shelf uncalibrated networks
                # Can set net = 'alex', 'squeeze' or 'vgg'
                # self.cri_lpips = models.PerceptualLoss(model='net', net='alex', use_gpu=lpips_GPU, model_path=None, spatial=lpips_spatial)
                # Low-level metrics
                # self.cri_lpips = models.PerceptualLoss(model='L2', colorspace='Lab', use_gpu=lpips_GPU)
                # self.cri_lpips = models.PerceptualLoss(model='ssim', colorspace='RGB', use_gpu=lpips_GPU)
            else:
                logger.info('Remove LPIPS loss.')
                self.cri_lpips = None
            # """

            # SPL loss
            # """
            if train_opt['spl_weight']:
                self.l_spl_w = train_opt['spl_weight']
                l_spl_type = train_opt['spl_type']
                # SPL Normalization (from [-1,1] images to [0,1] range, if needed)
                if z_norm == True:  # if images are in [-1,1] range
                    self.spl_norm = True  # normalize images to [0, 1]
                else:
                    self.spl_norm = False  # images are already in [0, 1] range
                # YUV Normalization (from [-1,1] images to [0,1] range, if needed, but mandatory)
                if z_norm == True:  # if images are in [-1,1] range
                    # normalize images to [0, 1] for yuv calculations
                    self.yuv_norm = True
                else:
                    self.yuv_norm = False  # images are already in [0, 1] range
                if l_spl_type == 'spl':  # Both GPL and CPL
                    # Gradient Profile Loss
                    self.cri_gpl = spl.GPLoss(spl_norm=self.spl_norm)
                    # Color Profile Loss
                    # You can define the desired color spaces in the initialization
                    # default is True for all
                    self.cri_cpl = spl.CPLoss(rgb=True,
                                              yuv=True,
                                              yuvgrad=True,
                                              spl_norm=self.spl_norm,
                                              yuv_norm=self.yuv_norm)
                elif l_spl_type == 'gpl':  # Only GPL
                    # Gradient Profile Loss
                    self.cri_gpl = spl.GPLoss(spl_norm=self.spl_norm)
                    self.cri_cpl = None
                elif l_spl_type == 'cpl':  # Only CPL
                    # Color Profile Loss
                    # You can define the desired color spaces in the initialization
                    # default is True for all
                    self.cri_cpl = spl.CPLoss(rgb=True,
                                              yuv=True,
                                              yuvgrad=True,
                                              spl_norm=self.spl_norm,
                                              yuv_norm=self.yuv_norm)
                    self.cri_gpl = None
            else:
                logger.info('Remove SPL loss.')
                self.cri_gpl = None
                self.cri_cpl = None
            # """

            # GD gan loss
            # """
            if train_opt['gan_weight']:
                self.cri_gan = GANLoss(train_opt['gan_type'], 1.0,
                                       0.0).to(self.device)
                self.l_gan_w = train_opt['gan_weight']
                # D_update_ratio and D_init_iters are for WGAN
                self.D_update_ratio = train_opt['D_update_ratio'] if train_opt[
                    'D_update_ratio'] else 1
                self.D_init_iters = train_opt['D_init_iters'] if train_opt[
                    'D_init_iters'] else 0

                if train_opt['gan_type'] == 'wgan-gp':
                    self.random_pt = torch.Tensor(1, 1, 1, 1).to(self.device)
                    # gradient penalty loss
                    self.cri_gp = GradientPenaltyLoss(device=self.device).to(
                        self.device)
                    self.l_gp_w = train_opt['gp_weigth']
            else:
                logger.info('Remove GAN loss.')
                self.cri_gan = None
            # """

            # optimizers
            # G
            wd_G = train_opt['weight_decay_G'] if train_opt[
                'weight_decay_G'] else 0

            optim_params = []
            for k, v in self.netG.named_parameters(
            ):  # can optimize for a part of the model
                if v.requires_grad:
                    optim_params.append(v)
                else:
                    logger.warning(
                        'Params [{:s}] will not optimize.'.format(k))
            self.optimizer_G = torch.optim.Adam(optim_params,
                                                lr=train_opt['lr_G'],
                                                weight_decay=wd_G,
                                                betas=(train_opt['beta1_G'],
                                                       0.999))
            self.optimizers.append(self.optimizer_G)

            # D
            if self.cri_gan:
                wd_D = train_opt['weight_decay_D'] if train_opt[
                    'weight_decay_D'] else 0
                self.optimizer_D = torch.optim.Adam(
                    self.netD.parameters(),
                    lr=train_opt['lr_D'],
                    weight_decay=wd_D,
                    betas=(train_opt['beta1_D'], 0.999))
                self.optimizers.append(self.optimizer_D)

            # schedulers
            if train_opt['lr_scheme'] == 'MultiStepLR':
                for optimizer in self.optimizers:
                    self.schedulers.append(
                        lr_scheduler.MultiStepLR(optimizer,
                                                 train_opt['lr_steps'],
                                                 train_opt['lr_gamma']))
            elif train_opt['lr_scheme'] == 'MultiStepLR_Restart':
                for optimizer in self.optimizers:
                    self.schedulers.append(
                        lr_schedulerR.MultiStepLR_Restart(
                            optimizer,
                            train_opt['lr_steps'],
                            restarts=train_opt['restarts'],
                            weights=train_opt['restart_weights'],
                            gamma=train_opt['lr_gamma'],
                            clear_state=train_opt['clear_state']))
            elif train_opt['lr_scheme'] == 'StepLR':
                for optimizer in self.optimizers:
                    self.schedulers.append(
                        lr_scheduler.StepLR(optimizer,
                                            train_opt['lr_step_size'],
                                            train_opt['lr_gamma']))
            elif train_opt['lr_scheme'] == 'StepLR_Restart':
                for optimizer in self.optimizers:
                    self.schedulers.append(
                        lr_schedulerR.StepLR_Restart(
                            optimizer,
                            step_sizes=train_opt['lr_step_sizes'],
                            restarts=train_opt['restarts'],
                            weights=train_opt['restart_weights'],
                            gamma=train_opt['lr_gamma'],
                            clear_state=train_opt['clear_state']))
            elif train_opt['lr_scheme'] == 'CosineAnnealingLR_Restart':
                for optimizer in self.optimizers:
                    self.schedulers.append(
                        lr_schedulerR.CosineAnnealingLR_Restart(
                            optimizer,
                            train_opt['T_period'],
                            eta_min=train_opt['eta_min'],
                            restarts=train_opt['restarts'],
                            weights=train_opt['restart_weights']))
            elif train_opt['lr_scheme'] == 'ReduceLROnPlateau':
                for optimizer in self.optimizers:
                    self.schedulers.append(
                        #lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5)
                        lr_scheduler.ReduceLROnPlateau(
                            optimizer,
                            mode=train_opt['plateau_mode'],
                            factor=train_opt['plateau_factor'],
                            threshold=train_opt['plateau_threshold'],
                            patience=train_opt['plateau_patience']))
            else:
                raise NotImplementedError(
                    'Learning rate scheme ("lr_scheme") not defined or not recognized.'
                )

            self.log_dict = OrderedDict()
Exemplo n.º 22
0
    model_ft.classifier[6] = nn.Linear(num_ftrs, len(CLASSES))

# Continue from last saved model
if CONTINUE_FLAG == 1: 
    print('Loading Saved Model: ', MODEL_PATH)
    model_ft.load_state_dict(torch.load(MODEL_PATH))

# Assign model to device (CPU if local)
model_ft = model_ft.to(device)

# Set up optimize
if OPTIMIZER == 'SGD':
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, momentum=SGD_SETTINGS[1], nesterov=SGD_SETTINGS[0])
elif OPTIMIZER == 'ADAM':
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

# Setup learning rate decay schedule (note: LR by a factor of 0.1 every 7 epochs)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=LR_DECAY_EPOCHS, gamma=LR_DECAY_FACTOR)

# Train and return model
criterion = nn.CrossEntropyLoss()

model_ft, best_acc = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, EVAL_FLAG, num_epochs=NUM_EPOCHS)

# Save model
torch.save(model_ft.state_dict(), MODEL_PATH)

# Visualize model
# visualize_model(model_ft)
# input('press <ENTER> to continue')
Exemplo n.º 23
0
 def on_start(state):
     if os.path.isfile(trace_file):
         os.remove(trace_file)
     state['scheduler'] = lr_scheduler.StepLR(state['optimizer'],
                                              opt['train.decay_every'],
                                              gamma=0.5)
Exemplo n.º 24
0
    def init_optimizer(self, net_params):
        optimizer = None
        if self.configer.get('optim', 'optim_method') == 'sgd':
            optimizer = SGD(
                net_params,
                lr=self.configer.get('lr', 'base_lr'),
                momentum=self.configer.get('optim', 'sgd')['momentum'],
                weight_decay=self.configer.get('optim', 'sgd')['weight_decay'],
                nesterov=self.configer.get('optim', 'sgd')['nesterov'])

        elif self.configer.get('optim', 'optim_method') == 'adam':
            optimizer = Adam(net_params,
                             lr=self.configer.get('lr', 'base_lr'),
                             betas=self.configer.get('optim', 'adam')['betas'],
                             eps=self.configer.get('optim', 'adam')['eps'],
                             weight_decay=self.configer.get(
                                 'optim', 'adam')['weight_decay'])

        else:
            Log.error('Optimizer {} is not valid.'.format(
                self.configer.get('optim', 'optim_method')))
            exit(1)

        policy = self.configer.get('lr', 'lr_policy')

        scheduler = None
        if policy == 'step':
            scheduler = lr_scheduler.StepLR(
                optimizer,
                self.configer.get('lr', 'step')['step_size'],
                gamma=self.configer.get('lr', 'step')['gamma'])

        elif policy == 'multistep':
            scheduler = lr_scheduler.MultiStepLR(
                optimizer,
                self.configer.get('lr', 'multistep')['stepvalue'],
                gamma=self.configer.get('lr', 'multistep')['gamma'])

        elif policy == 'lambda_poly':
            lambda_poly = lambda iters: pow(
                (1.0 - iters / self.configer.get('solver', 'max_iters')), 0.9)
            scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_poly)

        elif policy == 'lambda_cosine':
            lambda_cosine = lambda iters: (
                math.cos(math.pi * iters / self.configer.get(
                    'solver', 'max_iters')) + 1.0) / 2
            scheduler = lr_scheduler.LambdaLR(optimizer,
                                              lr_lambda=lambda_cosine)

        elif policy == 'plateau':
            scheduler = lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode=self.configer.get('lr', 'plateau')['mode'],
                factor=self.configer.get('lr', 'plateau')['factor'],
                patience=self.configer.get('lr', 'plateau')['patience'],
                threshold=self.configer.get('lr', 'plateau')['threshold'],
                threshold_mode=self.configer.get('lr', 'plateau')['thre_mode'],
                cooldown=self.configer.get('lr', 'plateau')['cooldown'],
                min_lr=self.configer.get('lr', 'plateau')['min_lr'],
                eps=self.configer.get('lr', 'plateau')['eps'])

        else:
            Log.error('Policy:{} is not valid.'.format(policy))
            exit(1)

        return optimizer, scheduler
Exemplo n.º 25
0
def main(args):
    #load hyperparameters from configuration file
    with open(args.config) as config_file:
        hyp = json.load(config_file)['hyperparams'][args.model]
    #override configuration dropout
    if args.dropout > 0:
        hyp['dropout'] = args.dropout
    if args.question_injection >= 0:
        hyp['question_injection_position'] = args.question_injection

    print('Loaded hyperparameters from configuration {}, model: {}: {}'.format(
        args.config, args.model, hyp))

    args.model_dirs = './model_{}_drop{}_bstart{}_bstep{}_bgamma{}_bmax{}_lrstart{}_'+ \
                      'lrstep{}_lrgamma{}_lrmax{}_invquests-{}_clipnorm{}_glayers{}_qinj{}_fc1{}_fc2{}_seed{}'
    args.model_dirs = args.model_dirs.format(
        args.model, hyp['dropout'], args.batch_size, args.bs_step,
        args.bs_gamma, args.bs_max, args.lr, args.lr_step, args.lr_gamma,
        args.lr_max, args.invert_questions, args.clip_norm, hyp['g_layers'],
        hyp['question_injection_position'], hyp['f_fc1'], hyp['f_fc2'],
        args.seed)
    if not os.path.exists(args.model_dirs):
        os.makedirs(args.model_dirs)
    #create a file in this folder containing the overall configuration
    args_str = str(args)
    hyp_str = str(hyp)
    all_configuration = args_str + '\n\n' + hyp_str
    filename = os.path.join(args.model_dirs, 'config.txt')
    with open(filename, 'w') as config_file:
        config_file.write(all_configuration)

    args.features_dirs = './features'
    args.test_results_dir = './test_results'
    if not os.path.exists(args.test_results_dir):
        os.makedirs(args.test_results_dir)

    args.cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    print('Building word dictionaries from all the words in the dataset...')
    dictionaries = utils.build_dictionaries(args.clevr_dir)
    print('Word dictionary completed!')

    print('Initializing CLEVR dataset...')
    clevr_dataset_train, clevr_dataset_test = initialize_dataset(
        args.clevr_dir, dictionaries, hyp['state_description'])
    print('CLEVR dataset initialized!')

    # Build the model
    args.qdict_size = len(dictionaries[0])
    args.adict_size = len(dictionaries[1])

    model = RN(args, hyp)

    if torch.cuda.device_count() > 1 and args.cuda:
        model = torch.nn.DataParallel(model)
        model.module.cuda()  # call cuda() overridden method

    if args.cuda:
        model.cuda()

    start_epoch = 1
    if args.resume:
        filename = args.resume
        if os.path.isfile(filename):
            print('==> loading checkpoint {}'.format(filename))
            checkpoint = torch.load(filename)

            #removes 'module' from dict entries, pytorch bug #3805
            if torch.cuda.device_count() == 1 and any(
                    k.startswith('module.') for k in checkpoint.keys()):
                checkpoint = {
                    k.replace('module.', ''): v
                    for k, v in checkpoint.items()
                }
            if torch.cuda.device_count() > 1 and not any(
                    k.startswith('module.') for k in checkpoint.keys()):
                checkpoint = {'module.' + k: v for k, v in checkpoint.items()}

            model.load_state_dict(checkpoint)
            print('==> loaded checkpoint {}'.format(filename))
            start_epoch = int(
                re.match(r'.*epoch_(\d+).pth', args.resume).groups()[0]) + 1

    if args.conv_transfer_learn:
        if os.path.isfile(args.conv_transfer_learn):
            # TODO: there may be problems caused by pytorch issue #3805 if using DataParallel

            print('==> loading conv layer from {}'.format(
                args.conv_transfer_learn))
            # pretrained dict is the dictionary containing the already trained conv layer
            pretrained_dict = torch.load(args.conv_transfer_learn)

            if torch.cuda.device_count() == 1:
                conv_dict = model.conv.state_dict()
            else:
                conv_dict = model.module.conv.state_dict()

            # filter only the conv layer from the loaded dictionary
            conv_pretrained_dict = {
                k.replace('conv.', '', 1): v
                for k, v in pretrained_dict.items() if 'conv.' in k
            }

            # overwrite entries in the existing state dict
            conv_dict.update(conv_pretrained_dict)

            # load the new state dict
            if torch.cuda.device_count() == 1:
                model.conv.load_state_dict(conv_dict)
                params = model.conv.parameters()
            else:
                model.module.conv.load_state_dict(conv_dict)
                params = model.module.conv.parameters()

            # freeze the weights for the convolutional layer by disabling gradient evaluation
            # for param in params:
            #     param.requires_grad = False

            print("==> conv layer loaded!")
        else:
            print('Cannot load file {}'.format(args.conv_transfer_learn))

    progress_bar = trange(start_epoch, args.epochs + 1)
    if args.test:
        # perform a single test
        print('Testing epoch {}'.format(start_epoch))
        _, clevr_test_loader = reload_loaders(clevr_dataset_train,
                                              clevr_dataset_test,
                                              args.batch_size,
                                              args.test_batch_size,
                                              hyp['state_description'])
        test(clevr_test_loader, model, start_epoch, dictionaries, args)
    else:
        bs = args.batch_size

        # perform a full training
        #TODO: find a better solution for general lr scheduling policies
        candidate_lr = args.lr * args.lr_gamma**(start_epoch -
                                                 1 // args.lr_step)
        lr = candidate_lr if candidate_lr <= args.lr_max else args.lr_max

        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=lr,
                               weight_decay=1e-4)
        # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, min_lr=1e-6, verbose=True)
        scheduler = lr_scheduler.StepLR(optimizer,
                                        args.lr_step,
                                        gamma=args.lr_gamma)
        scheduler.last_epoch = start_epoch
        print('Training ({} epochs) is starting...'.format(args.epochs))
        for epoch in progress_bar:

            if (((args.bs_max > 0 and bs < args.bs_max) or args.bs_max < 0)
                    and (epoch % args.bs_step == 0 or epoch == start_epoch)):
                bs = math.floor(args.batch_size *
                                (args.bs_gamma**(epoch // args.bs_step)))
                if bs > args.bs_max and args.bs_max > 0:
                    bs = args.bs_max
                clevr_train_loader, clevr_test_loader = reload_loaders(
                    clevr_dataset_train, clevr_dataset_test, bs,
                    args.test_batch_size, hyp['state_description'])

                #restart optimizer in order to restart learning rate scheduler
                #for param_group in optimizer.param_groups:
                #    param_group['lr'] = args.lr
                #scheduler = lr_scheduler.CosineAnnealingLR(optimizer, step, min_lr)
                print('Dataset reinitialized with batch size {}'.format(bs))

            if ((args.lr_max > 0 and scheduler.get_lr()[0] < args.lr_max)
                    or args.lr_max < 0):
                scheduler.step()

            print('Current learning rate: {}'.format(
                optimizer.param_groups[0]['lr']))

            # TRAIN
            progress_bar.set_description('TRAIN')
            train(clevr_train_loader, model, optimizer, epoch, args)

            # TEST
            progress_bar.set_description('TEST')
            test(clevr_test_loader, model, epoch, dictionaries, args)

            # SAVE MODEL
            filename = 'RN_epoch_{:02d}.pth'.format(epoch)
            torch.save(model.state_dict(),
                       os.path.join(args.model_dirs, filename))
Exemplo n.º 26
0
    def run(self):
        dataset_fn = self.config['dataset_fn']
        dataset_root = self.config['dataset_root']
        learn_rate = self.config['learn_rate']
        learn_rate_step = self.config['learn_rate_step']
        log_dir = self.config['log_dir']
        model_fn = self.config['model_fn']
        num_epochs = self.config['num_epochs']
        report_scalar_freq = self.config['report_scalar_freq']
        save_epoch_freq = self.config['save_epoch_freq']
        save_step_freq = self.config['save_step_freq']
        valid_freq = self.config['valid_freq']
        weight_decay = self.config['weight_decay']

        save_prefix = dataset_fn + '_' + model_fn
        if self.run_name():
            save_prefix = save_prefix + '_' + self.run_name()

        if self.reporter is None:
            self.reporter = SummaryWriter(log_dir)

        train_data = {
            m: DATASETS[dataset_fn](dataset_root, m)
            for m in self.modes
        }
        self.prepare_dataset(train_data)
        num_categories = train_data[self.modes[0]].num_categories()

        print('[*] Number of categories:', num_categories)

        net = self.create_model(num_categories)
        net.print_params()

        data_loaders = self.create_data_loaders(train_data)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(net.params_to_optimize(
            weight_decay, self.weight_decay_excludes()),
                               lr=learn_rate)
        if learn_rate_step > 0:
            lr_exp_scheduler = lr_scheduler.StepLR(optimizer,
                                                   step_size=learn_rate_step,
                                                   gamma=0.5)
        else:
            lr_exp_scheduler = None

        best_accu = 0.0
        best_net = -1

        ckpt_prefix = self.checkpoint_prefix()
        ckpt_nets = self.config['ckpt_nets']
        if ckpt_prefix is not None:
            loaded_paths = net.load(ckpt_prefix, ckpt_nets)
            print('[*] Loaded pretrained model from {}'.format(loaded_paths))

        for epoch in range(1, num_epochs + 1):
            print('-' * 20)
            print('[*] Epoch {}/{}'.format(epoch, num_epochs))

            for mode in self.modes:
                is_train = mode == 'train'
                if not is_train and epoch % valid_freq != 0:
                    continue
                print('[*] Starting {} mode'.format(mode))

                if is_train:
                    if lr_exp_scheduler is not None:
                        lr_exp_scheduler.step()
                    net.train_mode()
                else:
                    net.eval_mode()

                running_corrects = 0
                num_samples = 0
                pbar = tqdm.tqdm(total=len(data_loaders[mode]))
                for bid, data_batch in enumerate(data_loaders[mode]):
                    self.step_counters[mode] += 1

                    logits, loss, gt_category = self.forward_batch(
                        net, data_batch, mode, optimizer, criterion)
                    _, predicts = torch.max(logits, 1)
                    predicts_accu = torch.sum(predicts == gt_category)
                    running_corrects += predicts_accu.item()

                    sampled_batch_size = gt_category.size(0)
                    num_samples += sampled_batch_size

                    if report_scalar_freq > 0 and self.step_counters[
                            mode] % report_scalar_freq == 0:
                        self.reporter.add_scalar('{}/loss'.format(mode),
                                                 loss.item(),
                                                 self.step_counters[mode])
                        self.reporter.add_scalar(
                            '{}/accuracy'.format(mode),
                            float(predicts_accu.data) / sampled_batch_size,
                            self.step_counters[mode])

                    if is_train and save_step_freq > 0 and self.step_counters[
                            mode] % save_step_freq == 0:
                        net.save(log_dir, self.step_counters[mode],
                                 save_prefix)
                    pbar.update()
                pbar.close()
                epoch_accu = float(running_corrects) / float(num_samples)

                if is_train:
                    if epoch % save_epoch_freq == 0:
                        print('[*]  {} accu: {:.4f}'.format(mode, epoch_accu))
                        net.save(log_dir, 'epoch_{}'.format(epoch),
                                 save_prefix)
                else:
                    print('[*]  {} accu: {:.4f}'.format(mode, epoch_accu))
                    if epoch_accu > best_accu:
                        best_accu = epoch_accu
                        best_net = epoch
        print('[*] Best accu: {:.4f}, corresponding epoch: {}'.format(
            best_accu, best_net))

        for m in self.modes:
            train_data[m].dispose()

        return best_accu
Exemplo n.º 27
0
import datetime
from torch.optim import lr_scheduler
from config import Config
from train import Trainer
from loss import BCFocalLoss

cfig = Config()
net = xceptionAx3(num_classes=1)  #create CNN model.
criterion = nn.BCEWithLogitsLoss()  #define the los

optimizer = optim.SGD(net.parameters(),
                      lr=0.0001,
                      momentum=0.9,
                      weight_decay=0.0001)  #select the optimizer

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
# create the train_dataset_loader and val_dataset_loader.

train_tarnsformed_dataset = CloudDataset(img_dir='data/images224',
                                         labels_dir='data/masks224/',
                                         transform=transforms.Compose(
                                             [ToTensor()]))

val_tarnsformed_dataset = CloudDataset(img_dir='data/images224',
                                       labels_dir='data/masks224/',
                                       val=True,
                                       transform=transforms.Compose(
                                           [ToTensor()]))

train_dataloader = DataLoader(train_tarnsformed_dataset,
                              batch_size=8,
Exemplo n.º 28
0
                                          shuffle=False,
                                          num_workers=0)
len_testdata = len(test_loader)

#dir_model = model_dir + "\\model_epoch400"
#fcn_model = torch.load(dir_model)

fcn_model = FCNmodel_3pool(n_class)
fcn_model.cuda()
criterion = nn.CrossEntropyLoss()
criterion.cuda()
optimizer = optim.SGD(fcn_model.parameters(),
                      lr=lr,
                      momentum=momentum,
                      weight_decay=L2_factor)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

use_gpu = torch.cuda.is_available()
num_gpu = list(range(torch.cuda.device_count()))
print('use_gpu:', use_gpu)
print('num_gpu:', num_gpu)
#pdb.set_trace()


def train_model():
    fcn_model.train()
    for epoch in epochs:
        scheduler.step()

        train_data = dataloader(training=True)
        train_loader = torch.utils.data.DataLoader(train_data,
    model.eval()
    original_labels = []
    pred_lst = []

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            original_labels.extend(labels)
            pred_lst.extend(preds)
            
    precision, recall, f1, support = prfs(original_labels, pred_lst, average='weighted')

    print("Precision: {:.2%}\nRecall: {:.2%}\nF1 score: {:.2%}".format(precision, recall, f1))


model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 16)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,  num_epochs=20)
visualize_model(model_ft, num_images=20)
success_metrics(model_ft)
torch.save(model_ft, "./tlmodelv3.1")
Exemplo n.º 30
0
    elif args.optim == 'SGD':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     model.parameters()),
                              lr=rate,
                              momentum=args.momentum)
        optimizer_pretrain = optim.SGD(filter(lambda p: p.requires_grad,
                                              model.parameters()),
                                       lr=rate_pretrain,
                                       momentum=0.9)
    else:
        raise Exception(f'Invalid optimizer {args.optim} selected.')

    optimizers = [optimizer, optimizer_pretrain]

    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=sched_step,
                                    gamma=sched_gamma)
    scheduler_pretrain = lr_scheduler.StepLR(optimizer_pretrain,
                                             step_size=sched_step_pretrain,
                                             gamma=sched_gamma_pretrain)

    schedulers = [scheduler, scheduler_pretrain]

    utils.print_both(f, 'Mode: {}\n'.format(args.mode))

    if args.mode == 'train_full':
        model = training_functions.train_model(model, dataloader, criteria,
                                               optimizers, schedulers, epochs,
                                               params)
    elif args.mode == 'pretrain':
        model = training_functions.pretraining(model, dataloader, criteria[0],