Exemplo n.º 1
0
def augment(out_dir, chkpt_path, train_loader, valid_loader, model, writer,
            logger, device, config):

    w_optim = utils.get_optim(model.weights(), config.w_optim)

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_optim.lr_min)

    init_epoch = -1

    if chkpt_path is not None:
        logger.info("Resuming from checkpoint: %s" % chkpt_path)
        checkpoint = torch.load(chkpt_path)
        model.load_state_dict(checkpoint['model'])
        w_optim.load_state_dict(checkpoint['w_optim'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        init_epoch = checkpoint['epoch']
    else:
        logger.info("Starting new training run")

    logger.info("Model params count: {:.3f} M, size: {:.3f} MB".format(
        utils.param_size(model), utils.param_count(model)))

    # training loop
    logger.info('begin training')
    best_top1 = 0.
    tot_epochs = config.epochs
    for epoch in itertools.count(init_epoch + 1):
        if epoch == tot_epochs: break

        drop_prob = config.drop_path_prob * epoch / tot_epochs
        model.drop_path_prob(drop_prob)

        lr = lr_scheduler.get_lr()[0]

        # training
        train(train_loader, None, model, writer, logger, None, w_optim, None,
              lr, epoch, tot_epochs, device, config)
        lr_scheduler.step()

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, writer, logger, epoch, tot_epochs,
                        cur_step, device, config)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False

        if config.save_freq != 0 and epoch % config.save_freq == 0:
            save_checkpoint(out_dir, model, w_optim, None, lr_scheduler, epoch,
                            logger)

        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    tprof.stat_acc('model_' + NASModule.get_device()[0])
Exemplo n.º 2
0
 def __init__(self):
     self.config = SearchConfig()
     self.writer = None
     if self.config.tb_dir != "":
         from torch.utils.tensorboard import SummaryWriter
         self.writer = SummaryWriter(self.config.tb_dir, flush_secs=20)
     init_gpu_params(self.config)
     set_seed(self.config)
     self.logger = FileLogger('./log', self.config.is_master,
                              self.config.is_master)
     self.load_data()
     self.logger.info(self.config)
     self.model = SearchCNNController(self.config, self.n_classes,
                                      self.output_mode)
     self.load_model()
     self.init_kd_component()
     if self.config.n_gpu > 0:
         self.model.to(device)
     if self.config.n_gpu > 1:
         self.model = torch.nn.parallel.DistributedDataParallel(
             self.model,
             device_ids=[self.config.local_rank],
             find_unused_parameters=True)
     self.model_to_print = self.model if self.config.multi_gpu is False else self.model.module
     self.architect = Architect(self.model, self.teacher_model, self.config,
                                self.emd_tool)
     mb_params = param_size(self.model)
     self.logger.info("Model size = {:.3f} MB".format(mb_params))
     self.eval_result_map = []
     self.init_optim()
Exemplo n.º 3
0
def main():
    if not torch.cuda.is_available():
        logger.info("no gpu device available")
        sys.exit(1)

    logger.info("*** Begin {} ***".format(config.stage))

    # set default gpu device
    torch.cuda.set_device(config.gpus[0])

    # set random seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    logger.info("preparing data...")
    input_size, channels_in, num_classes, train_data, valid_data = \
        load_dataset(dataset=config.dataset,
                     data_dir=config.data_dir,
                     cutout_length=config.cutout_length,
                     validation=True,
                     auto_aug=config.auto_aug)

    valid_loader = torch.utils.data.DataLoader(dataset=valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.num_workers,
                                               pin_memory=True)

    logger.info("loading model...")
    if config.load_model_dir is not None:
        model = torch.load(config.load_model_dir)
    else:
        model = utils.load_checkpoint(config.model_dir)
    model = model.to(device)

    model_size = utils.param_size(model)
    logger.info("model_size: {:.3f} MB".format(model_size))

    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(num_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    logger.info("start testing...")
    best_top1 = test(valid_loader, model, criterion)

    logger.info("Final Prec@1: {:.4%}".format(best_top1))
    logger.info("*** Finish {} ***".format(config.stage))
Exemplo n.º 4
0
    def make_param_declaration(self, assign_node):
        lhs = assign_node.targets[0].id
        size = u.param_size(assign_node)

        decl = u.stmt_from_str(
            "self._m_%s = tf.Variable(init_params(%s), name='%s')" %
            (lhs, size, lhs))
        softmax = u.stmt_from_str(
            "%s = tpt.softmax(self._m_%s, scope='%s_softmax')" %
            (lhs, lhs, lhs))
        return [decl, softmax], ("self._m_%s" % lhs, lhs)
Exemplo n.º 5
0
def get_model(config, device, dev_list, genotype=None):
    mtype = config.type
    configure_ops(config)
    if mtype in model_creator:
        config.augment = not genotype is None
        net, arch = model_creator[mtype](config)
        crit = get_net_crit(config).to(device)
        prim = gt.get_primitives()
        model = NASController(config, net, crit, prim, dev_list).to(device)
        if config.augment:
            print("genotype = {}".format(genotype))
            model.build_from_genotype(genotype)
            model.to(device=device)
        if config.verbose: print(model)
        mb_params = param_size(model)
        n_params = param_count(model)
        print("Model params count: {:.3f} M, size: {:.3f} MB".format(n_params, mb_params))
        NASModule.set_device(dev_list)
        return model, arch
    else:
        raise Exception("invalid model type")
Exemplo n.º 6
0
def evaluation(sample, name):
    geno = eval(convert_sample_to_genotype(sample))
    logger.info('Model sample: {}'.format(sample))
    logger.info('Genotype: {}'.format(str(geno)))

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        'cifar10',
        args.data_path,
        config['image_size'],
        config['cutout_length'],
        validation=True)

    criterion = nn.CrossEntropyLoss().to(device)
    use_aux = True

    # change size of input image
    input_size = config['image_size']

    model = AugmentCNN(input_size, input_channels, config['init_channels'], 10,
                       config['layers'], True, geno)
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))
    model = nn.DataParallel(model, device_ids=[0]).to(device)

    # weights optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                config['lr'],
                                momentum=0.9,
                                weight_decay=3e-4)

    # get data loader
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=config['batch_size'], \
                                               shuffle=True, num_workers=4, pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config['batch_size'], \
                                               shuffle=True, num_workers=4, pin_memory=True)

    # lr scheduler
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, config['epochs'])

    best_top1 = 0.
    len_train_loader = len(train_loader)

    # training loop
    for epoch in range(config['epochs']):
        lr_scheduler.step()
        drop_prob = 0.2 * epoch / config['epochs']
        model.module.drop_path_prob(drop_prob, config['fp'])

        train(train_loader, model, optimizer, criterion, epoch)

        cur_step = (epoch + 1) * len_train_loader
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        # utils.save_checkpoint(model, config.path, is_best)

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    return best_top1, geno
Exemplo n.º 7
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        config.dataset, config.data_path, config.cutout_length, validation=True)

    criterion = nn.CrossEntropyLoss().to(device)
    use_aux = config.aux_weight > 0.
    model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers,
                       use_aux, config.genotype)
    model = nn.DataParallel(model, device_ids=config.gpus).to(device)

    # model size
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))

    # weights optimizer
    optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum,
                                weight_decay=config.weight_decay)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs)

    best_top1 = 0.
    # training loop
    for epoch in range(config.epochs):
        lr_scheduler.step()
        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.module.drop_path_prob(drop_prob)

        # training
        train(train_loader, model, optimizer, criterion, epoch)

        # validation
        cur_step = (epoch+1) * len(train_loader)
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False

        timebudget.report()

        utils.save_checkpoint(model, config.path, is_best)

        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
Exemplo n.º 8
0
def main():
    config = RetrainConfig()
    main_proc = not config.distributed or config.local_rank == 0
    if config.distributed:
        torch.cuda.set_device(config.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method=config.dist_url,
                                             rank=config.local_rank,
                                             world_size=config.world_size)
    if main_proc:
        os.makedirs(config.output_path, exist_ok=True)
    if config.distributed:
        torch.distributed.barrier()
    logger = utils.get_logger(os.path.join(config.output_path, 'search.log'))
    if main_proc:
        config.print_params(logger.info)
    utils.reset_seed(config.seed)

    loaders, samplers = get_augment_datasets(config)
    train_loader, valid_loader = loaders
    train_sampler, valid_sampler = samplers
    train_loader = CyclicIterator(train_loader, train_sampler)
    # valid_loader = CyclicIterator(valid_loader, valid_sampler, False)

    model = Model(config.dataset,
                  config.layers,
                  in_channels=config.input_channels,
                  channels=config.init_channels,
                  retrain=True).cuda()
    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(config.n_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()

    fixed_arc_path = os.path.join('', config.arc_checkpoint)
    with open(fixed_arc_path, "r") as f:
        fixed_arc = json.load(f)
    fixed_arc = utils.encode_tensor(fixed_arc, torch.device("cuda"))
    genotypes = utils.parse_results(fixed_arc, n_nodes=4)
    genotypes_dict = {i: genotypes for i in range(3)}
    apply_fixed_architecture(model, fixed_arc_path)
    param_size = utils.param_size(model, criterion, [3, 512, 512])

    if main_proc:
        logger.info("Param size: %.6f", param_size)
        logger.info("Genotype: %s", genotypes)

    # change training hyper parameters according to cell type
    if 'cifar' in config.dataset:
        if param_size < 3.0:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.2
        elif 3.0 < param_size < 3.5:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.3
        else:
            config.weight_decay = 5e-4
            config.drop_path_prob = 0.3

    if config.distributed:
        apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(model, delay_allreduce=True)

    optimizer = torch.optim.AdamW(model.parameters(), config.lr)
    # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs, eta_min=1E-6)

    best_top1 = 0.
    epoch = 0
    try:
        checkpoint = torch.load(config.model_checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch = checkpoint['epoch']
        loss = checkpoint['loss']

        model.eval()
        print("----------------------------")
        print("MODEL LOADED FROM CHECKPOINT" + config.model_checkpoint)
        print("----------------------------")
    except:
        print("----------------------------")
        print("MODEL NOT LOADED FROM CHECKPOINT")
        print("----------------------------")
        pass

    # for epoch in range(0, epoch):
    # lr_scheduler.step()

    for epoch in range(epoch, config.epochs):
        drop_prob = config.drop_path_prob * epoch / config.epochs
        if config.distributed:
            model.module.drop_path_prob(drop_prob)
        else:
            model.drop_path_prob(drop_prob)
        # training
        if config.distributed:
            train_sampler.set_epoch(epoch)
        train(logger, config, train_loader, model, optimizer, criterion, epoch,
              main_proc)
        if (epoch % config.log_frequency == 0):
            # validation
            top1 = validate(logger, config, valid_loader, model, criterion,
                            epoch, main_proc)
            best_top1 = max(best_top1, top1)
            # lr_scheduler.step()
            logger.info("Final best Prec@1 = %.4f", best_top1)
def exp(args, fold_idx, train_set, test_set):

    path = args.save_root + args.result_dir

    if not os.path.isdir(path):
        os.makedirs(path)
        os.makedirs(path + '/models')
        os.makedirs(path + '/logs')

    logger = eegdg_logger(path + f'/logs/{fold_idx}')

    with open(path + '/args.txt', 'w') as f:
        f.write(str(args))

    import torch.cuda
    cuda = torch.cuda.is_available()
    # check if GPU is available, if True chooses to use it
    device = 'cuda' if cuda else 'cpu'

    if cuda:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    seed = args.seed
    random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False)

    model = models.get_model(args)
    # model = FcClfNet(embedding_net)
    # model  = torch.nn.DataParallel(model)

    mb_params = utils.param_size(model)
    print(f"Model size = {mb_params:.4f} MB")
    if cuda:
        model.cuda(device=device)
    print(model)
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=args.lr,
                                    weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                           T_max=args.epochs -
                                                           1)

    results_columns = [
        f'valid_loss', f'test_loss', f'valid_accuracy', f'test_acc0.3uracy'
    ]
    df = pd.DataFrame(columns=results_columns)

    valid_acc = 0
    best_acc = 0
    max_acc = 0

    for epochidx in range(1, args.epochs):
        print(epochidx)
        start = time.time()
        train(10, model, device, train_loader, optimizer, scheduler, cuda,
              args.gpuidx)
        print(f'total time: {time.time()-start}')
        # utils.blockPrint()
        train_loss, train_score = eval(model, device, train_loader)
        test_loss, test_score = eval(model, device, test_loader)
        # utils.enablePrint()
        scheduler.step()
        lr = scheduler.get_last_lr()[0]
        #
        # lrs = []
        # for i in range(100):
        #     scheduler.step()
        #     lr = scheduler.get_last_lr()[0]
        #     lrs.append(lr)
        #
        # import matplotlib.pyplot as plt
        # plt.plot(lrs)
        # plt.show()

        print(f'LR : {lr}')
        logger.log_training(train_loss, train_score, test_loss, test_score, lr,
                            epochidx)

        if test_score >= max_acc:
            max_acc = test_score
            torch.save(
                model.state_dict(),
                os.path.join(path, 'models', f"model_fold{fold_idx}_max.pt"))
            max_epoch = epochidx

        torch.save(
            model.state_dict(),
            os.path.join(path, 'models', f"model_fold{fold_idx}_last.pt"))
        print(f'current max acc : {max_acc:.4f} at epoch {max_epoch}')

    best_model = models.get_model(args)
    best_model.load_state_dict(
        torch.load(os.path.join(path, 'models',
                                f"model_fold{fold_idx}_last.pt"),
                   map_location=device))
    if cuda:
        best_model.cuda(device=device)

    print("last accuracy")
    _, _ = eval(best_model, device, test_loader)

    df = utils.get_testset_accuracy(best_model, device, test_set, args)
    logger.close()
    return df
Exemplo n.º 10
0
def exp(args, fold_idx, train_set, valid_set, test_set):
    path = args.save_root + args.result_dir
    if not os.path.isdir(path):
        os.makedirs(path)
        os.makedirs(path + '/models')

    with open(path + '/args.txt', 'w') as f:
        f.write(str(args))

    import torch.cuda
    cuda = torch.cuda.is_available()
    device = 'cuda' if cuda else 'cpu'

    if cuda:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    seed = args.seed
    random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=False)
    valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=args.batch_size, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False)

    temp = torch.utils.data.Subset(train_set, range(100, 200))

    model = Net(args.n_class, args.n_ch, args.n_time)

    mb_params = utils.param_size(model)
    print(f"Model size = {mb_params:.4f} MB")
    if cuda:
        model.cuda(device=device)
    print(model)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)

    results_columns = [f'valid_loss', f'test_loss', f'valid_accuracy', f'test_accuracy']
    df = pd.DataFrame(columns=results_columns)

    valid_min_loss = float('inf')
    best_acc_loss = 0

    for epochidx in range(1, args.epochs):
        print(epochidx)
        train(10, model, device, train_loader, optimizer, scheduler, cuda, args.gpuidx)
        valid_loss, valid_score, _ = eval(model, device, valid_loader)
        test_loss, test_score, _ = eval(model, device, test_loader)

        results = {f'valid_loss': valid_loss, f'test_loss': test_loss, f'valid_accuracy': valid_score,
                   f'test_accuracy': test_score}
        df = df.append(results, ignore_index=True)
        print(results)

        scheduler.step()
        lr = scheduler.get_last_lr()[0]

        print(f'LR : {lr}')
        if valid_loss < valid_min_loss:
            valid_min_loss = valid_loss
            best_acc_loss = test_score
            torch.save(model.state_dict(), os.path.join(
                path, 'models',
                f"model_fold{fold_idx}.pt"))
            best_loss_epoch = epochidx
        print(f'current best(loss) acc : {best_acc_loss:.4f} at epoch {best_loss_epoch}')

    best_model = Net(args.n_class, args.n_ch, args.n_time)
    best_model.load_state_dict(torch.load(os.path.join(
        path, 'models',
        f"model_fold{fold_idx}.pt"), map_location=device))
    if cuda:
        best_model.cuda(device=device)

    print("best accuracy")
    _, test_score, _ = eval(best_model, device, test_loader)

    utils.enablePrint()
    print(f"subject:{fold_idx}, acc:{test_score}")

    df = pd.DataFrame(np.array(test_score).reshape(-1, 1), columns=['sess2-on'])
    print(f"all acc: {np.mean(test_score):.4f}")

    return df
Exemplo n.º 11
0
def main():
    logger.info("Logger is set - training start")
    fileRoot = r'/home/hlu/Data/VIPL'
    saveRoot = r'/home/hlu/Data/VIPL_STMap' + str(config.fold_num) + str(config.fold_index)
    n_classes = 1
    input_channels = 3
    input_size = np.array([64, 300])
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    toTensor = transforms.ToTensor()
    resize = transforms.Resize(size=(64, 300))
    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])
    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)
    # net acc
    torch.backends.cudnn.benchmark = True
    # get data with meta info
    if config.reData == 1:
        test_index, train_index = MyDataset.CrossValidation(fileRoot, fold_num=config.fold_num,
                                                            fold_index=config.fold_index)
        Train_Indexa = MyDataset.getIndex(fileRoot, train_index, saveRoot + '_Train', 'STMap_YUV_Align_CSI_POS.png', 15, 300)
        Test_Indexa = MyDataset.getIndex(fileRoot, test_index, saveRoot + '_Test', 'STMap_YUV_Align_CSI_POS.png', 15, 300)
    train_data = MyDataset.Data_STMap(root_dir=(saveRoot + '_Train'), frames_num=300,
                                      transform=transforms.Compose([resize, toTensor, normalize]))
    valid_data = MyDataset.Data_STMap(root_dir=(saveRoot + '_Test'), frames_num=300,
                                      transform=transforms.Compose([resize, toTensor, normalize]))
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)
    # loss
    criterion = nn.L1Loss().to(device)
    # net
    Model_name = config.name + 'fn' + str(config.fold_num) + 'fi' + str(config.fold_index)
    use_aux = config.aux_weight > 0.
    if config.reTrain == 1:
        model = torch.load(os.path.join(config.path, Model_name + 'best.pth.tar'), map_location=device)
        print('load ' + Model_name + ' right')
        model = nn.DataParallel(model, device_ids=config.gpus).to(device)
    else:
        model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers,
                           use_aux, config.genotype)
        model._init_weight()
        model = nn.DataParallel(model, device_ids=config.gpus).to(device)

    # model size
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))
    # weights optimizer
    optimizer = torch.optim.Adam(model.parameters(), config.lr)
    best_losses = 10

    # training loop
    for epoch in range(config.epochs):
        # training
        train(train_loader, model, optimizer, criterion, epoch)
        # validation
        cur_step = (epoch+1) * len(train_loader)
        best_losses = validate(valid_loader, model, criterion, epoch, cur_step, best_losses)
    logger.info("Final best Losses@1 = {:.4%}".format(best_losses))
Exemplo n.º 12
0
def main():
    logger.info("Logger is set - training start")
    logger.info("Torch version is: {}".format(torch.__version__))
    logger.info("Torch_vision version is: {}".format(torchvision.__version__))

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    # using deterministic
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        config.dataset,
        config.data_path,
        config.cutout_length,
        validation=True)

    criterion = nn.CrossEntropyLoss().to(device)
    use_aux = config.aux_weight > 0.
    model = AugmentCNN(input_size, input_channels, config.init_channels,
                       n_classes, config.layers, use_aux, config.genotype)
    # model size
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))
    model = nn.DataParallel(model, device_ids=config.gpus).to(device)

    # weights optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                config.lr,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)

    # get data loader
    if config.data_loader_type == 'Torch':
        train_loader = torch.utils.data.DataLoader(
            train_data,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=config.workers,
            pin_memory=True)
        valid_loader = torch.utils.data.DataLoader(
            valid_data,
            batch_size=config.batch_size,
            shuffle=False,
            num_workers=config.workers,
            pin_memory=True)
    elif config.data_loader_type == 'DALI':
        config.dataset = config.dataset.lower()
        if config.dataset == 'cifar10':
            from DataLoaders_DALI import cifar10
            train_loader = cifar10.get_cifar_iter_dali(
                type='train',
                image_dir=config.data_path,
                batch_size=config.batch_size,
                num_threads=config.workers)
            valid_loader = cifar10.get_cifar_iter_dali(
                type='val',
                image_dir=config.data_path,
                batch_size=config.batch_size,
                num_threads=config.workers)
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, config.epochs)

    best_top1 = 0.
    if config.data_loader_type == 'DALI':
        len_train_loader = get_train_loader_len(config.dataset.lower(),
                                                config.batch_size,
                                                is_train=True)
    else:
        len_train_loader = len(train_loader)
    # training loop
    for epoch in range(config.epochs):
        lr_scheduler.step()
        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.module.drop_path_prob(drop_prob)

        # training
        train(train_loader, model, optimizer, criterion, epoch)

        # validation
        cur_step = (epoch + 1) * len_train_loader
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def exp(args, fold_idx, train_set, valid_set, test_set):

    path = args.save_root + args.result_dir
    if not os.path.isdir(path):
        os.makedirs(path)
        os.makedirs(path + '/models')
        os.makedirs(path + '/logs')

    logger = eegdg_logger(path + f'/logs/{fold_idx}')

    with open(path + '/args.txt', 'w') as f:
        f.write(str(args))

    import torch.cuda
    cuda = torch.cuda.is_available()
    # check if GPU is available, if True chooses to use it
    device = 'cuda' if cuda else 'cpu'

    if cuda:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    seed = args.seed
    random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid_set,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False)

    model = models.get_model(args)
    # model = FcClfNet(embedding_net)
    # model  = torch.nn.DataParallel(model)

    mb_params = utils.param_size(model)
    print(f"Model size = {mb_params:.4f} MB")
    if cuda:
        model.cuda(device=device)
    print(model)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                weight_decay=1e-4,
                                momentum=0.9)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                           T_max=args.epochs -
                                                           1)

    results_columns = [
        f'valid_loss', f'test_loss', f'valid_accuracy', f'test_accuracy'
    ]
    df = pd.DataFrame(columns=results_columns)

    valid_acc = 0
    valid_min_loss = 100
    best_acc = 0
    best_acc_loss = 0
    max_acc = 0

    n_epochs_stop = 200
    epochs_no_improve = 0
    early_stop = False

    for epochidx in range(1, args.epochs):
        print(epochidx)
        start = time.time()
        train(10, model, device, train_loader, optimizer, scheduler, cuda,
              args.gpuidx)
        print(f'total time: {time.time()-start}')
        utils.blockPrint()
        train_loss, train_score = eval(model, device, train_loader)
        valid_loss, valid_score = eval(model, device, valid_loader)
        test_loss, test_score = eval(model, device, test_loader)
        utils.enablePrint()

        scheduler.step()
        lr = scheduler.get_last_lr()[0]

        print(f'LR : {lr}')
        logger.log_training(train_loss, train_score, test_loss, test_score, lr,
                            epochidx)

        results = {
            f'valid_loss': valid_loss,
            f'test_loss': test_loss,
            f'valid_accuracy': valid_score,
            f'test_accuracy': test_score
        }
        df = df.append(results, ignore_index=True)
        print(results)

        if valid_score > valid_acc:
            valid_acc = valid_score
            best_acc = test_score
            torch.save(
                model.state_dict(),
                os.path.join(path, 'models', f"model_fold{fold_idx}_best.pt"))
            best_epoch = epochidx

        if valid_loss < valid_min_loss:  #모델이 개선된경우
            valid_min_loss = valid_loss
            best_acc_loss = test_score
            torch.save(
                model.state_dict(),
                os.path.join(path, 'models',
                             f"model_fold{fold_idx}_best(loss).pt"))
            best_loss_epoch = epochidx
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if test_score > max_acc:
            max_acc = test_score
            torch.save(
                model.state_dict(),
                os.path.join(path, 'models', f"model_fold{fold_idx}_max.pt"))
            max_epoch = epochidx

        print(f'current best acc : {best_acc:.4f} at epoch {best_epoch}')
        print(
            f'current best(loss) acc : {best_acc_loss:.4f} at epoch {best_loss_epoch}'
        )
        print(f'current max acc : {max_acc:.4f} at epoch {max_epoch}')

        if epochidx > 5 and epochs_no_improve == n_epochs_stop:
            print('Early stopping!')
            early_stop = True
            break
        else:
            continue

    if early_stop:
        print("Stopped")

    best_model = models.get_model(args)
    best_model.load_state_dict(
        torch.load(os.path.join(path, 'models',
                                f"model_fold{fold_idx}_best.pt"),
                   map_location=device))
    if cuda:
        best_model.cuda(device=device)

    print("best accuracy")
    _, _ = eval(best_model, device, test_loader)

    df = utils.get_testset_accuracy(best_model, device, test_set, args)

    return df
Exemplo n.º 14
0
    def evaluate(self, trial_no, trial_hyperparams):
        """Evaluates objective function

        Trains the child model k times with same augmentation hyperparameters.
        k is determined by the user by `opt_samples` argument.

        Args:
            trial_no (int): no of trial. needed for recording to notebook
            trial_hyperparams (list)
        Returns:
            float: trial-cost = 1 - avg. rewards from samples
        """

        augmented_data = augment_by_policy(self.data["X_train"],
                                           self.data["y_train"],
                                           *trial_hyperparams)

        sample_rewards = []
        #pytorch
        layers = 2
        init_channels = 24
        use_aux = True
        epochs = 30
        lr = 0.01
        momentum = 0.995
        weight_decay = 0.995
        drop_path_prob = 0.2
        genotype = "Genotype(normal=[[('dil_conv_3x3', 0), ('sep_conv_5x5', 1)], [('sep_conv_3x3', 1), ('avg_pool_3x3', 0)],[('dil_conv_3x3', 1), ('dil_conv_3x3', 0)], [('sep_conv_3x3', 3), ('skip_connect', 1)]], normal_concat=range(2, 6), reduce=[[('sep_conv_3x3', 1), ('dil_conv_5x5', 0)], [('skip_connect', 0), ('sep_conv_5x5', 1)], [('sep_conv_5x5', 1),('sep_conv_5x5', 0)], [('max_pool_3x3', 1), ('sep_conv_3x3', 0)]], reduce_concat=range(2, 6))"
        model = AugmentCNN(self.input_size, self.input_channels, init_channels,
                           self.n_classes, layers, use_aux, genotype)
        model = nn.DataParallel(model, device_ids='0').to(device)

        # model size
        mb_params = utils.param_size(model)
        logger.info("Model size = {:.3f} MB".format(mb_params))

        # weights optimizer
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr,
                                    momentum=momentum,
                                    weight_decay=weight_decay)
        a = 2 / 0
        """
        for sample_no in range(1, self.opt_samples + 1):
            self.child_model.load_pre_augment_weights()
            # TRAIN
            history = self.child_model.fit(self.data, augmented_data)
            #
            reward = self.calculate_reward(history)
            sample_rewards.append(reward)
            self.notebook.record(
                trial_no, trial_hyperparams, sample_no, reward, history
            )

        """
        best_top1 = -9999
        for epoch in range(epochs):
            lr_scheduler.step()
            drop_prob = drop_path_prob * epoch / epochs
            model.module.drop_path_prob(drop_prob)

            # training
            train(train_loader, model, optimizer, criterion, epoch)

            # validation
            cur_step = (epoch + 1) * len(train_loader)
            top1 = validate(valid_loader, model, criterion, epoch, cur_step)

            # save
            if best_top1 < top1:
                best_top1 = top1
                is_best = True
            else:
                is_best = False
        print('best_top1:', best_top1)
        #sample_rewards.append(reward)
        #self.notebook.record(
        #    trial_no, trial_hyperparams, sample_no, reward, history
        #)
        #trial_cost = 1 - np.mean(sample_rewards)
        #self.notebook.save()

        log_and_print(
            f"{str(trial_no)}, {str(trial_cost)}, {str(trial_hyperparams)}",
            self.logging,
        )

        #return trial_cost
        return best_top1
Exemplo n.º 15
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        config.dataset,
        config.data_path,
        config.cutout_length,
        validation=True,
        autoaugment=config.autoaugment)

    if config.label_smooth != 0:
        criterion = utils.CrossEntropyLabelSmooth(
            10, config.label_smooth).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)
    use_aux = config.aux_weight > 0.
    if config.dataset in utils.LARGE_DATASETS:
        model = AugmentCNNImageNet(input_size, input_channels,
                                   config.init_channels, n_classes,
                                   config.layers, use_aux, config.genotype)
    else:
        model = AugmentCNN(input_size,
                           input_channels,
                           config.init_channels,
                           n_classes,
                           config.layers,
                           use_aux,
                           config.genotype,
                           SSC=config.SSC)
    model = nn.DataParallel(model, device_ids=config.gpus).to(device)
    # model size
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))

    # weights optimizer
    if config.p != 1:
        optimizer = torch.optim.SGD(model.parameters(),
                                    1.,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    config.lr,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)
    if config.p == 1:
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, config.epochs)
    else:
        lr_cpa = utils.cosine_power_annealing_lr(nepochs=config.epochs,
                                                 min_lr=config.lr_min,
                                                 max_lr=config.lr,
                                                 p=config.p)
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [lr_cpa])
    best_top1 = 0.
    # training loop
    for epoch in range(config.epochs):
        lr_scheduler.step()
        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.module.drop_path_prob(drop_prob)

        # training
        train(train_loader, model, optimizer, criterion, epoch)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

        print("")
    logger.info("Final best Prec@1 = {:.4%} for job {}".format(
        best_top1, config.name))
Exemplo n.º 16
0
def main():
    if not torch.cuda.is_available():
        logger.info("no gpu device available")
        sys.exit(1)

    logger.info("*** Begin {} ***".format(config.stage))

    # set default gpu device
    torch.cuda.set_device(config.gpus[0])

    # set random seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    logger.info("preparing data...")
    input_size, channels_in, num_classes, train_data, valid_data = \
        load_dataset(dataset=config.dataset,
                     data_dir=config.data_dir,
                     cutout_length=config.cutout_length,
                     validation=True,
                     auto_aug=config.auto_aug)

    train_loader = torch.utils.data.DataLoader(
        dataset=train_data,
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=config.num_workers,
        pin_memory=True)

    valid_loader = torch.utils.data.DataLoader(
        dataset=valid_data,
        batch_size=config.batch_size,
        shuffle=False,
        num_workers=config.num_workers,
        pin_memory=True)

    logger.info("parsing genotypes...")
    genotypes = parse_genotypes()
    logger.info(genotypes)
    
    logger.info("building model...")
    model = AugmentCNN(input_size=input_size,
                       channels_in=channels_in,
                       channels_init=config.init_channels,
                       num_cells=config.num_cells,
                       num_nodes=config.num_nodes,
                       num_classes=num_classes, 
                       stem_multiplier=3,
                       auxiliary=(config.aux_weight > 0),
                       genotypes=genotypes,
                       alpha_share=config.alpha_share)
    model = model.to(device)

    model_size = utils.param_size(model)
    logger.info("model_size: {:.3f} MB".format(model_size))

    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(
            num_classes, config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    optimizer = torch.optim.SGD(params=model.parameters(),
                                lr=config.learning_rate,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)

    if config.power_lr:
        lr_scheduler = utils.CosinePowerAnnealingLR(
            optimizer=optimizer, T_max=config.epochs, p=2)
    else:
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer=optimizer, T_max=config.epochs)

    logger.info("start training...")
    history_top1 = []
    best_top1 = 0.0

    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]
        logger.info("epoch: {:d}, lr: {:e}".format(epoch, lr))

        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.drop_path_prob(drop_prob)

        train(train_loader, model, criterion, optimizer, epoch)

        global_step = (epoch + 1) * len(train_loader) - 1
        valid_top1 = valid(valid_loader, model, criterion, epoch, global_step)
        history_top1.append(valid_top1)

        if epoch == 0 or best_top1 < valid_top1:
            best_top1 = valid_top1
            is_best = True
        else:
            is_best = False

        utils.save_checkpoint(model, config.model_dir, is_best=is_best)

    with open(os.path.join(config.stage_dir, "history_top1.pk"), "wb") as f:
        pickle.dump(history_top1, f)

    logger.info("Final best valid Prec@1: {:.4%}".format(best_top1))
    logger.info("*** Finish {} ***".format(config.stage))
def main(HIDDEN_NODE_FC1, HIDDEN_NODE_FC2, HIDDEN_NODE_FC3):
    args = parse_args_for_train(HIDDEN_NODE_FC1, HIDDEN_NODE_FC2,
                                HIDDEN_NODE_FC3)

    custom_train_data_from_txt = CustomDatasetFromTxt(args.dataset, train=True)
    custom_test_data_from_txt = CustomDatasetFromTxt(args.dataset, train=False)
    if custom_train_data_from_txt.data_len > 10000:
        args.batch_size = 1000
    args.log_interval = custom_train_data_from_txt.data_len / 10
    writer = SummaryWriter(log_dir=os.path.join(args.log_path, 'tensorboard'))
    writer.add_text('config', utils.as_markdown(args), 0)

    logger = utils.get_logger(
        os.path.join(args.log_path, "{}.log".format("automl_nn_ax")))
    mlp_params = argparse.Namespace(FC1=HIDDEN_NODE_FC1,
                                    FC2=HIDDEN_NODE_FC2,
                                    FC3=HIDDEN_NODE_FC3,
                                    sync=False)
    utils.print_params(mlp_params, logger.info)

    train_loader = torch.utils.data.DataLoader(
        dataset=custom_train_data_from_txt,
        batch_size=args.batch_size,
        shuffle=True)
    validate_loader = torch.utils.data.DataLoader(
        dataset=custom_test_data_from_txt,
        batch_size=custom_test_data_from_txt.data_len,
        shuffle=False)
    input_size, out_size = custom_train_data_from_txt.input_out_size()

    torch.cuda.set_device(args.gpu)

    #set seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    torch.backends.cudnn.benchmark = True

    model = MLPNet(input_size, HIDDEN_NODE_FC1, HIDDEN_NODE_FC2,
                   HIDDEN_NODE_FC3, out_size)
    NN = MLP_network(input_size, HIDDEN_NODE_FC1, HIDDEN_NODE_FC2,
                     HIDDEN_NODE_FC3, out_size)
    test_mlp_data_flow = TestMLP_network(NN)
    res_map = test_mlp_data_flow.test_eyeriss_isca16()
    model = model.to(device)

    if args.multi_gpu:
        model = torch.nn.DataParallel(model)

    start_epoch = 0
    best_top1 = 0.
    if args.resume:
        logger.info("===> resume from the checkpoint")
        assert os.path.isdir(args.log_path), 'Error: no checkpoint path found!'
        checkpoint_file = best_filename = os.path.join(args.log_path,
                                                       'best.pth.tar')
        checkpoint = torch.load(checkpoint_file)
        model.load_state_dict(checkpoint['net'])
        best_top1 = checkpoint['acc']
        start_epoch = checkpoint['epoch']

    #model size
    mb_params = utils.param_size(model)
    logger.info("model size: {:.3f} KB".format(mb_params))

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), args.w_lr)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=100,
                                                   gamma=0.2)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        lr_scheduler.step()
        # training
        train(train_loader, model, optimizer, criterion, epoch, logger, args)
        top1 = validate(validate_loader, model, criterion, epoch, logger, args)
        if best_top1 < top1:
            best_top1 = top1
            state = {
                'net': model.state_dict(),
                'acc': best_top1,
                'epoch': epoch,
            }
            utils.save_checkpoint(state, args.log_path, True)
    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("total_cost:{%d},total_time:{%d}" %
                (int(res_map['total_cost']), int(res_map['total_time'])))
    return [best_top1, res_map['total_cost'], res_map['total_time']]
Exemplo n.º 18
0
def start_run():
    config = Config()

    if os.path.exists(config.path):
        while True:
            cont_str = input(
                "Name has been used. Continue and delete other log files? (y/n)"
            )
            if cont_str.lower() == 'n':
                exit()
            elif cont_str.lower() == 'y':
                shutil.rmtree(config.path)
                break
            else:
                print("Invalid input.")

    device = torch.device("cuda")

    # tensorboard
    writer = SummaryWriter(log_dir=os.path.join(config.path, "tb"))
    writer.add_text('config', config.as_markdown(), 0)

    logger = utils.get_logger(
        os.path.join(config.path, "{}.log".format(config.name)))
    config.print_params(logger.info)

    logger.info("Logger is set - training start")

    # set gpu device id
    logger.info("Set GPU device {}".format(config.gpu))
    torch.cuda.set_device(config.gpu)

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    #TODO: fix folds/cv
    data_params, train_data, valid_data = utils.get_data(
        config.prop_mouse_data_to_use)

    model = UNet(config.total_channels_to_add, data_params['num_classes'],
                 data_params['input_channels'], config.shake_drop,
                 not config.no_scse, config.num_downsamples,
                 config.num_blocks_per_downsample)

    model = model.to(device)

    logger.info("Model Size (MB): {}".format(utils.param_size(model)))

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)

    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)

    nb_iters_train = config.epochs * len(train_loader)

    if config.lr_finder:
        w_sched_lr = utils.ExpFinderSchedule(config.w_lr_start,
                                             config.w_lr_end, nb_iters_train)
    else:
        w_sched_lr = utils.PiecewiseLinearOrCos(
            [0.0, config.first_prop * nb_iters_train, nb_iters_train],
            np.array([config.w_lr_start, config.w_lr_middle,
                      config.w_lr_end]), [False, True])

    if config.wd_finder:
        weight_decay = utils.ExpFinderSchedule(config.w_weight_decay,
                                               config.w_weight_decay_end,
                                               nb_iters_train)
    else:
        weight_decay = config.w_weight_decay

    w_optim = Adam(model.parameters(),
                   lr=w_sched_lr,
                   weight_decay=weight_decay)

    cur_step = 0
    best_iou = 0.

    # training loop
    for epoch in range(config.epochs):

        cur_step = train(train_loader, model, w_optim, epoch, writer, device,
                         config, logger, cur_step)

        if (epoch + 1) % config.val_freq == 0:
            # validation
            total_iou = validate(valid_loader, model, epoch, cur_step, writer,
                                 device, config, logger)

            saves = ['checkpoint']
            is_best = best_iou < total_iou
            # save
            if is_best:
                best_iou = total_iou
                saves.append('best')
            utils.save_item(model, config.path, saves)

        print("")

    logger.info("Final best iou = {:.4%}".format(best_iou))
Exemplo n.º 19
0
def main():
    logger.info("Logger is set - training start")

    # set gpu device id
    logger.info("Set GPU device {}".format(config.gpu))
    torch.cuda.set_device(config.gpu)

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get dataset
    train_data, valid_data, data_shape = get_dataset(config.data,
                                                     config.data_path,
                                                     config.aug_lv)

    # build model
    criterion = nn.CrossEntropyLoss().to(device)
    model = FractalNet(data_shape,
                       config.columns,
                       config.init_channels,
                       p_ldrop=config.p_ldrop,
                       dropout_probs=config.dropout_probs,
                       gdrop_ratio=config.gdrop_ratio,
                       gap=config.gap,
                       init=config.init,
                       pad_type=config.pad,
                       doubling=config.doubling,
                       dropout_pos=config.dropout_pos,
                       consist_gdrop=config.consist_gdrop)
    model = model.to(device)

    # model size
    m_params = utils.param_size(model)
    logger.info("Models:\n{}".format(model))
    logger.info("Model size (# of params) = {:.3f} M".format(m_params))

    # weights optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                config.lr,
                                momentum=config.momentum)

    # setup data loader
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, config.lr_milestone)

    best_top1 = 0.
    # training loop
    for epoch in range(config.epochs):
        lr_scheduler.step()

        # training
        train(train_loader, model, optimizer, criterion, epoch)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model.state_dict(), config.path, is_best)

        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
Exemplo n.º 20
0
def main(config, writer, logger):
    logger.info("Logger is set - training augment start")

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        config.dataset,
        config.data_path,
        config.cutout_length,
        validation=True)

    criterion = nn.CrossEntropyLoss().cuda()
    use_aux = config.aux_weight > 0.
    model = AugmentCNN(input_size, input_channels, config.init_channels,
                       n_classes, config.layers, use_aux,
                       config.genotype).cuda()

    # model size
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))

    # weights optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                config.lr,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, config.epochs)

    best_top1 = 0.
    # training loop
    for epoch in range(config.epochs):
        lr_scheduler.step()
        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.drop_path_prob(drop_prob)

        # training
        train(train_loader, model, optimizer, criterion, epoch, config, writer,
              logger)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, criterion, epoch, cur_step,
                        config, writer, logger)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
Exemplo n.º 21
0
def main():
    config = RetrainConfig()
    main_proc = not config.distributed or config.local_rank == 0
    if config.distributed:
        torch.cuda.set_device(config.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method=config.dist_url,
                                             rank=config.local_rank,
                                             world_size=config.world_size)
    if main_proc:
        os.makedirs(config.output_path, exist_ok=True)
    if config.distributed:
        torch.distributed.barrier()
    logger = utils.get_logger(os.path.join(config.output_path, 'search.log'))
    if main_proc:
        config.print_params(logger.info)
    utils.reset_seed(config.seed)

    loaders, samplers = get_augment_datasets(config)
    train_loader, valid_loader = loaders
    train_sampler, valid_sampler = samplers

    model = Model(config.dataset,
                  config.layers,
                  in_channels=config.input_channels,
                  channels=config.init_channels,
                  retrain=True).cuda()
    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(config.n_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()

    fixed_arc_path = os.path.join(config.output_path, config.arc_checkpoint)
    with open(fixed_arc_path, "r") as f:
        fixed_arc = json.load(f)
    fixed_arc = utils.encode_tensor(fixed_arc, torch.device("cuda"))
    genotypes = utils.parse_results(fixed_arc, n_nodes=4)
    genotypes_dict = {i: genotypes for i in range(3)}
    apply_fixed_architecture(model, fixed_arc_path)
    param_size = utils.param_size(
        model, criterion,
        [3, 32, 32] if 'cifar' in config.dataset else [3, 224, 224])

    if main_proc:
        logger.info("Param size: %.6f", param_size)
        logger.info("Genotype: %s", genotypes)

    # change training hyper parameters according to cell type
    if 'cifar' in config.dataset:
        if param_size < 3.0:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.2
        elif 3.0 < param_size < 3.5:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.3
        else:
            config.weight_decay = 5e-4
            config.drop_path_prob = 0.3

    if config.distributed:
        apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(model, delay_allreduce=True)

    optimizer = torch.optim.SGD(model.parameters(),
                                config.lr,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                              config.epochs,
                                                              eta_min=1E-6)

    best_top1 = best_top5 = 0.
    for epoch in range(config.epochs):
        drop_prob = config.drop_path_prob * epoch / config.epochs
        if config.distributed:
            model.module.drop_path_prob(drop_prob)
        else:
            model.drop_path_prob(drop_prob)
        # training
        if config.distributed:
            train_sampler.set_epoch(epoch)
        train(logger, config, train_loader, model, optimizer, criterion, epoch,
              main_proc)

        # validation
        top1, top5 = validate(logger, config, valid_loader, model, criterion,
                              epoch, main_proc)
        best_top1 = max(best_top1, top1)
        best_top5 = max(best_top5, top5)
        lr_scheduler.step()

    logger.info("Final best Prec@1 = %.4f Prec@5 = %.4f", best_top1, best_top5)
Exemplo n.º 22
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    #input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
    #    config.dataset, config.data_path, config.cutout_length, validation=True)
    input_size, input_channels, n_classes, train_data, test_dat, val_dat = utils.get_data(
        config.dataset,
        config.data_path,
        cutout_length=0,
        validation=True,
        validation2=True)
    print('input_size', input_size)
    criterion = nn.CrossEntropyLoss().to(device)
    use_aux = config.aux_weight > 0.
    #from evaluate
    #model = SearchCNNController(input_channels, config.init_channels, n_classes, config.layers,
    #                            net_crit, device_ids=config.gpus)

    model = AugmentCNN(input_size, input_channels, config.init_channels,
                       n_classes, config.layers, use_aux, config.genotype)
    model = nn.DataParallel(model, device_ids=config.gpus).to(device)

    # model size
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))

    # weights optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                config.lr,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)

    # split data to train/validation
    best_top1 = 0.
    best_top_overall = -999

    n_train = len(train_data)
    n_val = len(val_dat)
    n_test = len(test_dat)
    split = n_train // 2
    indices1 = list(range(n_train))
    indices2 = list(range(n_val))
    indices3 = list(range(n_test))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices1)
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices2)
    test_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices3)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(val_dat,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_dat,
                                              batch_size=config.batch_size,
                                              sampler=test_sampler,
                                              num_workers=config.workers,
                                              pin_memory=True)
    """
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)
    """
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, config.epochs)
    #lambda1 = lambda epoch: 0.95 ** epoch
    #lr_scheduler = torch.optim.lr_scheduler.RLambdaLR(optimizer, lr_lambda=[lambda1])

    best_top1 = 0.
    # training loop
    for epoch in range(config.epochs):
        lr_scheduler.step()
        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.module.drop_path_prob(drop_prob)

        # training
        train(train_loader, model, optimizer, criterion, epoch)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        #utils.save_checkpoint(model, config.path, is_best)
        utils.save_checkpoint2(model, epoch, optimizer, criterion, config.path,
                               is_best)
        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))