def load_model(model, optim, path):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optim.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    return model, optim, epoch, loss
示例#2
0
文件: utils.py 项目: CEA-LIST/adv-sat
def load_model(net, optim, save_path, filename, device):
    """Load a model and its optimizer

    Args:
        net (nn.Module): architecture of the saved model
        optim (torch.optim): optimizer to load
        save_path (str): path where the file is stored
        filename (str): filename to open
        device (torch.device): device to load the model and optimizer to

    Returns:
        net (nn.Module): loaded model
        optim (torch.optim): optimizer of the loaded model
        best_acc (int): performance of the model
        epoch (int): number of epoch the model were trained
    """
    state = torch.load(os.path.join(save_path, filename), map_location=device)
    net.load_state_dict(state['net'])
    best_acc = state['acc']
    epoch = state['epoch']
    try:
        optim_state = state['optim']
    except KeyError:
        optim_state = None

    if optim_state and optim:
        optim.load_state_dict(optim_state)

    return net, optim, best_acc, epoch
示例#3
0
    def load_checkpoint(self, checkpoint_path, optim=None, only_model=False):
        """ Write docstring
        """
        if os.path.isfile(checkpoint_path):
            print('[PROGRESS] Loading checkpoint: {}'.format(checkpoint_path), end="", flush=True)

            # Load the checkpoint
            checkpoint = torch.load(checkpoint_path, map_location='cpu')

            # Load the model state dictionary from the checkpoint
            self.model.load_state_dict(checkpoint['state_dict'])
            print('\r[INFO] Checkpoint has been loaded: {}'.format(checkpoint_path))

            if not only_model:
                # Load optimization method parameters from the checkpoint
                optim.load_state_dict(checkpoint['optimizer'])
                # Load the necessary checkpoint key values to the states dictionary which contains loss and history values/lists
                self.states.update({key: value for key, value in checkpoint.items() if key not in ['optimizer', 'state_dict']})

                print('[INFO] History lists have been loaded')
                print('[INFO] Resuming from epoch {}'.format(checkpoint['epoch']+1))

                # content of checkpoint is loaded to the instance; so, delete checkpoint variable to create space on the GPU
                del checkpoint
                torch.cuda.empty_cache()

                return optim
        else:
            raise FileNotFoundError('Checkpoint file not found: %s' % checkpoint_path)
示例#4
0
def load(name):
    state_dicts = torch.load(name)
    model.load_state_dict(state_dicts['net'])
    try:
        optim.load_state_dict(state_dicts['opt'])
    except ValueError:
        print('Cannot load optimizer for some reason or other')
示例#5
0
def load_model(model_path, model, optim):
    ckpt = torch.load(model_path)
    model.load_state_dict(ckpt['state_dict'])
    optim.load_state_dict(ckpt['optimizer'])
    epoch = ckpt['epoch']

    return model, epoch, optim
示例#6
0
def load_model(net, optim, path):
    print ("==> restoring checkpoint")
    ckpt = torch.load(path)
    epoch = ckpt['epoch']
    net.load_state_dict(ckpt['state_dict'])
    optim.load_state_dict(ckpt['optimizer'])
    print ("==> loaded checkpoint '{}' (epoch {})".format(path, epoch))
    return net, optim, epoch
示例#7
0
def load(name):
    state_dicts = torch.load(name)
    network_state_dict = {k:v for k,v in state_dicts['net'].items() if 'tmp_var' not in k}
    combined_model.load_state_dict(network_state_dict)
    try:
        optim.load_state_dict(state_dicts['opt'])
        feature_optim.load_state_dict(state_dicts['opt_f'])
    except:
        print('Cannot load optimizer for some reason or other')
def load(name, load_opt=False, model=model):
    print("loads fn_model from ", name)
    state_dicts = torch.load(name, map_location='cpu')
    #print(state_dicts)
    model.load_state_dict(state_dicts['net'])
    model.to(c.device)
    if load_opt:
        try:
            optim.load_state_dict(state_dicts['opt'])
        except ValueError:
            print('Cannot load optimizer for some reason or other')
def loadModel(conf, device):
    if conf.modelSave == "best":
        fileToLoad = conf.modelFile
    else:
        fileToLoad = conf.modelFileLoad

    print("Loading {}".format(fileToLoad), flush=True)

    model, optim = makeModel(conf, device)

    checkpoint = torch.load(fileToLoad)
    model.load_state_dict(checkpoint['model_state_dict'])
    optim.load_state_dict(checkpoint['optim_state_dict'])
    return model, optim
示例#10
0
def load_checkpoint(checkpoint_path: str, model: nn.Module, optim: optimizer.Optimizer) -> Tuple[int, int, float]:
    """Loads training checkpoint.

    :param checkpoint_path: path to checkpoint
    :param model: model to update state
    :param optim: optimizer to  update state
    :return tuple of starting epoch id, starting step id, best checkpoint score
    """
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint[_MODEL_STATE_DICT])
    optim.load_state_dict(checkpoint[_OPTIMIZER_STATE_DICT])
    start_epoch_id = checkpoint[_EPOCH] + 1
    step = checkpoint[_STEP] + 1
    best_score = checkpoint[_BEST_SCORE]
    return start_epoch_id, step, best_score
示例#11
0
def build_optim(_model, train_args, checkpoint=None):
    saved_optimizer_state_dict = None
    if checkpoint:
        optim = checkpoint['optim']
        saved_optimizer_state_dict = optim.state_dict()
    else:
        optim = AdamW(_model.parameters(), lr=train_args.lr, eps=1e-8)

    if train_args.train_from is not None:
        optim.load_state_dict(saved_optimizer_state_dict)
        if train_args.device != 'cpu':
            for state in optim.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda(device=train_args.device)
    return optim
示例#12
0
def load_checkpoint(fname, model, optim):
    if os.path.isfile(fname):
        print("\nCheckpoint file found. Resuming from checkpoint.\n")
        checkpoint = torch.load(fname)
        if 'model_state_dict' in checkpoint:
            model.load_state_dict(checkpoint['model_state_dict'])
            print("Model parameters loaded from checkpoint.")
        if 'optimizer_state_dict' in checkpoint:
            optim.load_state_dict(checkpoint['optimizer_state_dict'])
            print("Optimizer parameters loaded from checkpoint.")
        if 'loss' in checkpoint:
            min_loss = checkpoint['loss']
            print("Previous validation loss loaded.")
        if 'epoch' in checkpoint:
            prev_epochs = checkpoint['epoch']
            print("Continuing training from epoch: {}".format(prev_epochs))

        return model, optim, min_loss, prev_epochs
def load_model(output,
               epoch,
               model,
               model_name,
               optim=None,
               scheduler=None,
               csv_path=None):

    checkpoint_name = "{}.pth".format(model_name)
    try:
        print("checkpoint: ", os.path.join(output, checkpoint_name))
        checkpoint = torch.load(os.path.join(output, checkpoint_name))
        try:
            model.load_state_dict(checkpoint['state_dict'], strict=False)
            if optim != None:
                optim.load_state_dict(checkpoint['opt_dict'])
            if scheduler != None:
                scheduler.load_state_dict(checkpoint['scheduler_dict'])
            epoch_resume = checkpoint["epoch"] + 1
            bestLoss = checkpoint["best_loss"]
        except:
            model.load_state_dict(checkpoint)
            epoch_resume = 0

        if csv_path != None:
            stats = dict(
                epoch_resume="Resuming from epoch {}\n".format(epoch_resume))
            write_csv_stats(csv_path, stats)
        return epoch_resume

    except FileNotFoundError:
        print("No checkpoint found\n")

    except:  # saved model in nn.DataParallel
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in checkpoint.items():
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        model.load_state_dict(new_state_dict)

    return 0
def load_model(model_path):
    
    if "vgg16" in model_path:
        model = models.vgg16(pretrained=True)
    elif "vgg19" in model_path:
        model = models.vgg19(pretrained=True)
    elif "densenet121" in model_path:
        model = models.densenet121(pretrained=True)        
    elif "densenet161" in model_path:
        model = models.densenet161(pretrained=True)        
            
    state = torch.load(model_path)
    
    model.classifier = state['classifier']
    optim = state['optimizer']
    
    model.load_state_dict(state['state_dict'])
    optim.load_state_dict(state['optimizer_state_dict'])
    model.class_to_idx = state['class_to_idx']
    
    return model, optim
示例#15
0
def train_lstm(x, y, training, lr=0.003):
    Epochs = 120
    policy_model = PreAccLstm(D=args.hidden, layers=args.layers)
    print(policy_model)
    checkpoint = load_checkpoint(args)
    policy_model = torch.nn.DataParallel(policy_model).cuda()
    loss_func = nn.L1Loss()
    optim = torch.optim.Adam(policy_model.parameters(), lr=lr)
    if checkpoint is not None:
        policy_model.load_state_dict(checkpoint['state_dict'])
        optim.load_state_dict(checkpoint['optimizer'])

    torch_trainset = Data.TensorDataset(x, y)
    # print (torch_trainset)
    train_loader = Data.DataLoader(dataset=torch_trainset,
                                   batch_size=4,
                                   shuffle=True,
                                   num_workers=4)
    train = [train_loader]
    # print ("debug 0 ", training)
    for tr in training:
        # print ("debug 4", tr)
        idx = np.random.choice(tr[0].size(0), int(tr[0].size(0) * 0.95))
        # print(idx)
        # print(tr[1][idx])
        torch_trainset = Data.TensorDataset(tr[0][idx], tr[1][idx])
        train_loader = Data.DataLoader(dataset=torch_trainset,
                                       batch_size=2,
                                       shuffle=True,
                                       num_workers=2)
        train.append(train_loader)
    # print("debug1", train)
    for epoch in range(Epochs):
        ### Train for one epoch
        loss = LSTM_train(policy_model, train, loss_func, optim, epoch)
    return policy_model, loss
示例#16
0
def main():
    args = get_args()
    device, dtype = args.device, args.dtype

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size,
                                           args.batch_size, args.input_size,
                                           args.workers, args.world_size,
                                           args.local_rank)

    model = MnasNet(n_class=args.num_classes,
                    width_mult=args.scaling,
                    drop_prob=0.0,
                    num_steps=len(train_loader) * args.epochs)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    flops = flops_benchmark.count_flops(MnasNet,
                                        1,
                                        device,
                                        dtype,
                                        args.input_size,
                                        3,
                                        width_mult=args.scaling)
    if not args.child:
        print(model)
        print('number of parameters: {}'.format(num_parameters))
        print('FLOPs: {}'.format(flops))

    # define loss function (criterion) and optimizer
    criterion = CrossEntropyLoss()
    mixup = Mixup(args.num_classes, args.mixup, args.smooth_eps)

    model, criterion = model.to(device=device,
                                dtype=dtype), criterion.to(device=device,
                                                           dtype=dtype)
    if args.dtype == torch.float16:
        for module in model.modules():  # FP batchnorm
            if is_bn(module):
                module.to(dtype=torch.float32)

    if args.distributed:
        args.device_ids = [args.local_rank]
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_init,
                                world_size=args.world_size,
                                rank=args.local_rank)
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.local_rank], output_device=args.local_rank)
        print('Node #{}'.format(args.local_rank))
    else:
        model = torch.nn.parallel.DataParallel(model,
                                               device_ids=[args.local_rank],
                                               output_device=args.local_rank)

    optimizer_class = torch.optim.SGD
    optimizer_params = {
        "lr": args.learning_rate,
        "momentum": args.momentum,
        "weight_decay": args.decay,
        "nesterov": True
    }
    if args.find_clr:
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.decay,
                                    nesterov=True)
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=args.save_path)
        return

    if args.sched == 'clr':
        scheduler_class = CyclicLR
        scheduler_params = {
            "base_lr": args.min_lr,
            "max_lr": args.max_lr,
            "step_size": args.epochs_per_step * len(train_loader),
            "mode": args.mode
        }
    elif args.sched == 'multistep':
        scheduler_class = MultiStepLR
        scheduler_params = {"milestones": args.schedule, "gamma": args.gamma}
    elif args.sched == 'cosine':
        scheduler_class = CosineLR
        scheduler_params = {
            "max_epochs": args.epochs,
            "warmup_epochs": args.warmup,
            "iter_in_epoch": len(train_loader)
        }
    elif args.sched == 'gamma':
        scheduler_class = StepLR
        scheduler_params = {"step_size": 30, "gamma": args.gamma}
    else:
        raise ValueError('Wrong scheduler!')

    optim = OptimizerWrapper(model,
                             optimizer_class=optimizer_class,
                             optimizer_params=optimizer_params,
                             scheduler_class=scheduler_class,
                             scheduler_params=scheduler_params,
                             use_shadow_weights=args.dtype == torch.float16)
    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(
                args.resume, 'checkpoint{}.pth.tar'.format(args.local_rank))
            csv_path = os.path.join(args.resume,
                                    'results{}.csv'.format(args.local_rank))
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype,
                                args.child)  # TODO
        return

    csv_logger = CsvLogger(filepath=args.save_path,
                           data=data,
                           local_rank=args.local_rank)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if args.input_size in claimed_acc_top1:
        if args.scaling in claimed_acc_top1[args.input_size]:
            claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling]
            if not args.child:
                csv_logger.write_text(
                    'Claimed accuracy is {:.2f}% top-1'.format(claimed_acc1 *
                                                               100.))
    train_network(args.start_epoch, args.epochs, optim, model, train_loader,
                  val_loader, criterion, mixup, device, dtype, args.batch_size,
                  args.log_interval, csv_logger, args.save_path, claimed_acc1,
                  claimed_acc5, best_test, args.local_rank, args.child)
示例#17
0
def continuous_optim(tensor_list,
                     train_data,
                     loss_fun,
                     epochs=10,
                     val_data=None,
                     other_args=dict()):
    """
    Train a tensor network using gradient descent on input dataset

    Args:
        tensor_list: List of tensors encoding the network being trained
        train_data:  The data used to train the network
        loss_fun:    Scalar-valued loss function of the type 
                        tens_list, data -> scalar_loss
                     (This depends on the task being learned)
        epochs:      Number of epochs to train for. When val_data is given,
                     setting epochs=None implements early stopping
        val_data:    The data used for validation
        other_args:  Dictionary of other arguments for the optimization, 
                     with some options below (feel free to add more)

                        optim: Choice of Pytorch optimizer (default='SGD')
                        lr:    Learning rate for optimizer (default=1e-3)
                        bsize: Minibatch size for training (default=100)
                        reps:  Number of times to repeat 
                               training data per epoch     (default=1)
                        print: Whether to print info       (default=True)
                        dyn_print: use dynamic printing    (default=False)
                        hist:  Whether to return losses
                               from train and val sets     (default=False)
                        momentum: Momentum value for 
                                  continuous optimization  (default=0)
                        cvg_threshold: threshold to test convergence of 
                            optimization (optimization is stopped if 
                            |(prev_loss - cur_loss)/prev_loss| <  cvg_threshold
                            If None, convergence is not checked. If epochs is
                            set as well, then optimziation is stopped either when
                            convergence criteria is met or when epochs is reached
                                                            (default:None)
                        lr_scheduler: a function taking an optimizer as input
                        and returning a learning rate scheduler for this optimizer
                                                            (default:None)
                        save_optimizer_state: if True, other_args should have an empty
                            dict for the key optimizer_state. This dict will contain 
                              {optimizer_state: optimizer state_dict,
                              lr_scheduler_state: scheduler state_dict (if any)}
                            after the function returns.     (default:False)
                        load_optimzer_state: a dictionnary that will be used to 
                            initialize the optimizer (and scheduler if any) from a
                            previously saved optimizer state.
                                                            (default: None)
                        grad_masking_function: a function taking the list of tensor
                            parameters between the backward pass and the optimizer step
                            (can be used to e.g. zero out parts of the gradient)
                                                            (default: None)
                        stop_condition: a function taking the training and validation loss
                            as input after each epoch and returning True if optimization 
                            should be stopped               (default: None)

    
    Returns:
        better_list: List of tensors with same shape as tensor_list, but
                     having been optimized using the appropriate optimizer.
                     When validation data is given, the model with the 
                     lowest validation loss is output, otherwise the model
                     with lowest training loss
        first_loss:  Initial loss of the model on the validation set, 
                     before any training. If no val set is provided, the
                     first training loss is instead returned
        best_loss:   The value of the validation/training loss for the
                     model output as better_list
        best_epoch:  epoch at which best_model was found
        loss_record: If hist=True in other_args, history of all validation
                     and training losses is returned as a tuple of Pytorch
                     vectors (train_loss, val_loss), with each vector
                     having length equal to number of epochs of training.
                     When no validation loss is provided, the second item
                     (val_loss) is an empty tensor.
    """
    # Check input and initialize local record variables
    early_stop = epochs is None
    has_val = val_data is not None
    optim = other_args['optim'] if 'optim' in other_args else 'SGD'
    lr = other_args['lr'] if 'lr' in other_args else 1e-3
    bsize = other_args['bsize'] if 'bsize' in other_args else 100
    reps = other_args['reps'] if 'reps' in other_args else 1
    prnt = other_args['print'] if 'print' in other_args else True
    hist = other_args['hist'] if 'hist' in other_args else False
    dyn_print = other_args['dyn_print'] if 'dyn_print' in other_args else False
    lr_scheduler = other_args[
        'lr_scheduler'] if 'lr_scheduler' in other_args else None
    cvg_threshold = other_args[
        'cvg_threshold'] if 'cvg_threshold' in other_args else None
    save_optimizer_state = other_args[
        'save_optimizer_state'] if 'save_optimizer_state' in other_args else None
    load_optimizer_state = other_args[
        'load_optimizer_state'] if 'load_optimizer_state' in other_args else None
    grad_masking_function = other_args[
        'grad_masking_function'] if 'grad_masking_function' in other_args else None
    momentum = other_args['momentum'] if 'momentum' in other_args else 0

    stop_condition = other_args[
        'stop_condition'] if 'stop_condition' in other_args else None

    if save_optimizer_state and (not 'optimizer_state' in other_args):
        raise ValueError(
            "an empty dictionnary should be passed as the optimizer_state argument to store the"
            " optimizer state.")
    if early_stop and not has_val:
        raise ValueError("Early stopping (epochs=None) requires val_data "
                         "to be input")
    loss_rec, first_loss, best_loss, best_network, best_epoch = [], None, np.infty, tensor_list, 0
    if hist: loss_record = ([], [])  # (train_record, val_record)

    # Function to maybe print, conditioned on `prnt`
    m_print = lambda s: print(s, end='\r'
                              if dyn_print else '\n') if prnt else None

    # Function to record loss information and return whether to stop
    def record_loss(new_loss, new_network, epoch_num):
        # Load record variables from outer scope
        nonlocal loss_rec, first_loss, best_loss, best_network, best_epoch

        # Check for first and best loss
        if best_loss is None or new_loss < best_loss:
            best_loss, best_network, best_epoch = new_loss, new_network, epoch_num
        if first_loss is None:
            first_loss = new_loss

        # Update loss record and check for early stopping. If you want to
        # change early stopping criteria, this is the place to do it.
        window = 2  # Number of epochs kept for checking early stopping
        warmup = 1  # Number of epochs before early stopping is checked
        if len(loss_rec) < window:
            stop, loss_rec = False, loss_rec + [new_loss]
        else:
            # stop = new_loss > sum(loss_rec)/len(loss_rec)
            stop = (new_loss > max(loss_rec)) and (epoch_num >= warmup)
            loss_rec = loss_rec[1:] + [new_loss]

        return stop

    # Another loss logging function, but for recording *all* loss history
    @torch.no_grad()
    def loss_history(new_loss, is_val):
        if not hist: return
        nonlocal loss_record
        loss_record[int(is_val)].append(new_loss)

    # Function to run TN on validation data
    @torch.no_grad()
    def run_val(t_list):
        val_loss = []

        # Note that `batchify` uses different logic for different types
        # of input, so update batchify when you work on tensor completion
        for batch in batchify(val_data):
            val_loss.append(loss_fun(t_list, batch))
        if has_val:
            val_loss = torch.mean(torch.tensor(val_loss))

        return val_loss

    # Copy tensor_list so the original is unchanged
    tensor_list = copy_network(tensor_list)

    # Record the initial validation loss (if we validation dataset)
    if has_val: record_loss(run_val(tensor_list), tensor_list, 0)

    # Initialize optimizer, using only the keyword args in the
    optim = getattr(torch.optim, optim)
    opt_args = signature(optim).parameters.keys()
    kwargs = {'lr': lr, 'momentum': momentum}  # <- Add new options here
    kwargs = {k: v for (k, v) in kwargs.items() if k in opt_args}
    optim = optim(tensor_list, **kwargs)  # Initialize the optimizer
    if lr_scheduler:  # instantiate learning rate scheduler
        scheduler = lr_scheduler(optim)

    if load_optimizer_state:
        optim.load_state_dict(
            other_args["load_optimizer_state"]["optimizer_state"])
        if lr_scheduler:
            scheduler.load_state_dict(
                other_args["load_optimizer_state"]["lr_scheduler_state"])

    # Loop over validation and training for given number of epochs
    ep = 1
    prev_loss = np.infty

    while epochs is None or ep <= epochs:

        # Train network on all the training data
        #from copy import deepcopy
        prev_tensor_list = copy_network(tensor_list)
        #prev_tensor_list = tensor_list
        train_loss, num_train = 0., 0
        for batch in batchify(train_data, batch_size=bsize, reps=reps):
            loss = loss_fun(tensor_list, batch)
            optim.zero_grad()
            loss.backward()
            if grad_masking_function:
                grad_masking_function(tensor_list)
            optim.step()

            with torch.no_grad():
                num_train += 1
                train_loss += loss

        train_loss /= num_train

        if lr_scheduler:
            scheduler.step(train_loss)

        loss_history(train_loss, is_val=False)

        val_loss = run_val(tensor_list) if has_val else None

        val_loss_str = f"Val. loss:  {val_loss.data:.10f}" if has_val else ""
        m_print(
            f"EPOCH {ep} {'('+str(reps)+' reps)' if reps > 1 else ''}\t\t{val_loss_str}\t\t Train loss: {train_loss.data:.10f}\t\t Convergence: {np.abs(train_loss-prev_loss)/prev_loss:.10f}"
        )

        # Get validation loss if we have it, otherwise record training loss
        if has_val:
            # Get and record validation loss, check early stopping condition
            loss_history(val_loss, is_val=True)
            if record_loss(
                    val_loss,
                    copy_network(tensor_list) if has_val else prev_tensor_list,
                    ep) and early_stop:
                print(f"\nEarly stopping condition reached")
                break
        else:
            record_loss(
                train_loss,
                copy_network(tensor_list) if has_val else prev_tensor_list, ep)

        if cvg_threshold and np.abs(train_loss -
                                    prev_loss) / prev_loss < cvg_threshold:
            print(f"\nConvergence criteria reached")
            break
        if stop_condition and stop_condition(train_loss=train_loss,
                                             val_loss=val_loss):
            print(f"\nStopping condition reached")
            break

        prev_loss = train_loss

        ep += 1
    m_print("")

    # Save the optimizer state if needed
    if save_optimizer_state:
        other_args["optimizer_state"]["optimizer_state"] = optim.state_dict()
        if lr_scheduler:
            other_args["optimizer_state"][
                "lr_scheduler_state"] = scheduler.state_dict()

    if hist:
        loss_record = tuple(torch.tensor(fr) for fr in loss_record)
        return best_network, first_loss, best_loss, best_epoch, loss_record
    else:
        return best_network, first_loss, best_loss
示例#18
0
if args.task == 's2m':
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True)
else:
    optimizer = torch.optim.Adam(model.parameters(), args.lr)

if args.resume:
    print("=> loading checkpoint '{}'".format(args.resume))
    checkpoint = torch.load(args.resume)
    args.start_epoch = checkpoint['epoch']
    best_prec1 = checkpoint['best_prec1']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        args.resume, checkpoint['epoch']))

criterion_bce = nn.BCELoss()
criterion_cel = nn.CrossEntropyLoss()

best_prec1 = 0
best_pred_y = []
best_gt_y = []
global_step = 0
total_steps = args.grl_rampup_epochs * len(source_loader)


def train(epoch):
    model.train()
示例#19
0
def main(run_id, pretrained, data_files, model_params, training_params,
         device):
    best_acc1 = 0
    batch_size = training_params['batch_size']
    test_batch_size = training_params['test_batch_size']
    epochs = training_params['epochs']
    start_epoch = training_params['start_epoch']
    n_warmup_steps = training_params['n_warmup_steps']
    log_interval = training_params['log_interval']

    # model is trained for binary classification (for datalaoder)
    if model_params['NUM_SPOOF_CLASS'] == 2:
        binary_class = True
    else:
        binary_class = False

    kwargs = {
        'num_workers': 2,
        'pin_memory': True
    } if device == torch.device('cuda') else {}

    # create model
    model = Detector(**model_params).to(device)
    num_model_params = sum(p.numel() for p in model.parameters()
                           if p.requires_grad)
    print('===> Model total parameter: {}'.format(num_model_params))

    # Wrap model for multi-GPUs, if necessary
    if device == torch.device('cuda') and torch.cuda.device_count() > 1:
        print('multi-gpu')
        model = nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    optim = optimizer.ScheduledOptim(
        torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                         betas=(0.9, 0.98),
                         eps=1e-09,
                         weight_decay=1e-4,
                         lr=3e-4,
                         amsgrad=True), training_params['n_warmup_steps'])

    # optionally resume from a checkpoint
    if pretrained:
        if os.path.isfile(pretrained):
            print("===> loading checkpoint '{}'".format(pretrained))
            checkpoint = torch.load(pretrained)
            start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("===> loaded checkpoint '{}' (epoch {})".format(
                pretrained, checkpoint['epoch']))
        else:
            print("===> no checkpoint found at '{}'".format(pretrained))

    # Data loading code
    train_data = SpoofDatsetSystemID(data_files['train_scp'],
                                     data_files['train_utt2index'],
                                     binary_class)
    val_data = SpoofDatsetSystemID(data_files['dev_scp'],
                                   data_files['dev_utt2index'], binary_class)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               **kwargs)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=test_batch_size,
                                             shuffle=True,
                                             **kwargs)

    best_epoch = 0
    early_stopping, max_patience = 0, 100  # for early stopping
    os.makedirs("model_snapshots/" + run_id, exist_ok=True)
    for epoch in range(start_epoch, start_epoch + epochs):

        trainer.train(train_loader, model, optim, epoch, device, log_interval)
        acc1 = validate.validate(val_loader, data_files['dev_utt2systemID'],
                                 model, device, log_interval)

        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        # adjust learning rate + early stopping
        if is_best:
            early_stopping = 0
            best_epoch = epoch + 1
        else:
            early_stopping += 1
            if epoch - best_epoch > 2:
                optim.increase_delta()
                best_epoch = epoch + 1
        if early_stopping == max_patience:
            break

        # save model
        optimizer.save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optim.state_dict(),
            }, is_best, "model_snapshots/" + str(run_id),
            str(epoch) + ('_%.3f' % acc1) + ".pth.tar")
示例#20
0
def main(config):
    device = torch.device("cuda")

    generator_config = config['generator']  # Model experiments total epochs and beginning epoch
    initial_epoch = generator_config['initial_epoch']  # O by default and otherwise 'N' if loading
    num_epochs = generator_config['num_epochs']  # Total Number of Epochs
    plt_sep = generator_config['plot_separate']  # Plot the train, valid and test separately: 0 or 1
    lamb = generator_config['set_lamda']  # Plot the train, valid and test separately: 0 or 1
    loss_up = generator_config['loss_up']  # Plot the train, valid and test separately: 0 or 1
    bsz = generator_config['batch_size']  # Batch size for training/testing loaders

    # Get the model architecture
    model_params = config['model_params'] 
    fin = model_params['fin'] # Input node features
    fou1 = model_params['fou1'] # Output node features for first GC block
    clus = model_params['clus'] # Number of clusters learned for first GC block
    fou2 = model_params['fou2'] # Output node features for second GC block
    hlin = model_params['hlin'] # Output of the first liner layer
    outp = model_params['outp'] # Number of output classes
    psudim = model_params['psudim'] # Dimension of the pseudo-coordinates

    optm_config = config['optimizer']
    b1 = optm_config['B1']  # B1 for Adam Optimizer: Ex. 0.9
    b2 = optm_config['B2']  # B2 for Adam Optimizer: Ex. 0.999
    lr = optm_config['LR']  # Learning Rate: Ex. 0.001

    directory_config = config['directories']
    out_dir = directory_config['out_dir']  # Path to save the outputs of the experiments
    config_name = directory_config['ConfigName']  # Configuration Name to Uniquely Identify this Experiment
    log_path = join(out_dir, config_name, 'log')  # Path to save the training log files
    main_path = directory_config['datafile']  # Full Path of the dataset. Folder contains train, valid and test
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    if not os.path.exists(join(log_path, 'weights')):  # Path to save the weights of training
        os.makedirs(join(log_path, 'weights'))

    # Initialize the model, optimizer and data loader
    model = GCNet(fin=fin, fou1=fou1, clus=clus, fou2=fou2, hlin=hlin, outp=outp, psudim=psudim)  # Create the model
    model = model.to(device)
    compute_loss = torch.nn.BCEWithLogitsLoss()  # Loss function: Binary cross-entropy with logits
    optimizer = optm.Adam(model.parameters(), lr=lr, betas=(b1, b2))
    train_set = GeometricDataset('train', main_path)
    train_loader = DataLoader(train_set,
                              batch_size=bsz,
                              num_workers=4,
                              shuffle=True)
    valid_set = GeometricDataset('valid', main_path)
    valid_loader = DataLoader(valid_set,
                              batch_size=bsz,
                              num_workers=4,
                              shuffle=False)
    test_set = GeometricDataset('test', main_path)
    test_loader = DataLoader(test_set,
                             batch_size=bsz,
                             num_workers=4,
                             shuffle=False)

    if initial_epoch > 0:
        print("===> Loading pre-trained weight {}".format(initial_epoch - 1))
        weight_path = 'weights/model-{:04d}.pt'.format(initial_epoch - 1)
        checkpoint = torch.load(join(log_path, weight_path))
        model.load_state_dict(checkpoint['model_state_dict'])
        optm.load_state_dict(checkpoint['optimizer_state_dict'])

    def checkpoint(epc):
        w_path = 'weights/model-{:04d}.pt'.format(epc)
        torch.save(
            {'epoch': epc, 'model_state_dict': model.state_dict(),
             'optimizer_state_dict': optimizer.state_dict()}, join(log_path, w_path))

    # setup our callbacks used to plot curves
    my_metric = ['Accuracy']
    my_loss = ['Loss']
    logger = Logger(mylog_path=log_path, mylog_name="training.log", myloss_names=my_loss, mymetric_names=my_metric)
    ls_plt = LossPlotter(mylog_path=log_path, mylog_name="training.log",
                         myloss_names=my_loss, mymetric_names=my_metric, cmb_plot=plt_sep)

    def train(loader):
        lss_all = acc_all = 0
        model.zero_grad()
        model.train()

        for data in tqdm(loader):
            data.to(device)
            optimizer.zero_grad()

            out, reg = model(data)
            loss = (loss_up * compute_loss(out, func.one_hot(torch.LongTensor([data.sx.item()]), num_classes=2).float().cuda())) + (lamb * reg)
            acc = bin_accuracy(torch.max(out, 1)[1], data.sx)
            loss.backward()
            optimizer.step()

            lss_all += loss.item()
            acc_all += acc

        metric = np.array([lss_all / len(loader), acc_all / len(loader)])

        return metric

    def test(loader):
        lss_all = acc_all = 0
        model.eval()

        with torch.no_grad():
            for data in tqdm(loader):
                data.to(device)

                out, reg = model(data)
                loss = (loss_up * compute_loss(out, func.one_hot(torch.LongTensor([data.sx.item()]), num_classes=2).float().cuda())) + (lamb * reg)
                acc = bin_accuracy(torch.max(out, 1)[1], data.sx)

                lss_all += loss.item()
                acc_all += acc

            metric = np.array([lss_all / len(loader), acc_all / len(loader)])

        return metric

    print("===> Starting Model Training at Epoch: {}".format(initial_epoch))

    for epoch in range(initial_epoch, num_epochs):
        start = time.time()

        print("\n\n")
        print("Epoch:{}".format(epoch))

        train_metric = train(train_loader)
        print(
            "===> Training   Epoch {}: Loss = {:.4f}, Accuracy = {:.4f}".format(epoch, train_metric[0], train_metric[1]))
        val_metric = test(valid_loader)
        print("===> Validation Epoch {}: Loss = {:.4f}, Accuracy = {:.4f}".format(epoch, val_metric[0], val_metric[1]))
        test_metric = test(test_loader)
        print("===> Testing    Epoch {}: Loss = {:.4f}, Accuracy = {:.4f}".format(epoch, test_metric[0], test_metric[1]))

        logger.to_csv(np.concatenate((train_metric, val_metric, test_metric)), epoch)
        ls_plt.plotter()
        checkpoint(epoch)
        end = time.time()
        print("===> Epoch:{} Completed in {:.4f} seconds".format(epoch, end - start))

    print("===> Done Training for Total {:.4f} Epochs".format(num_epochs))
示例#21
0
文件: train.py 项目: pgsrv/bcnn
def main(args):
    fine_tune = not args.no_finetune
    pre_train = True

    lr = args.lr
    input_size = args.input_size

    order = 2
    embedding = args.embedding_dim
    model_names_list = args.model_names_list

    args.exp_dir = os.path.join(args.dataset, args.exp_dir)

    if args.dataset in ['cars', 'aircrafts']:
        keep_aspect = False
    else:
        keep_aspect = True

    if args.dataset in ['aircrafts']:
        crop_from_size = [(x * 256) // 224 for x in input_size]
    else:
        crop_from_size = input_size

    if 'inat' in args.dataset:
        split = {'train': 'train', 'val': 'val'}
    else:
        split = {'train': 'train_val', 'val': 'test'}

    if len(input_size) > 1:
        assert order == len(input_size)

    if not keep_aspect:
        input_size = [(x, x) for x in input_size]
        crop_from_size = [(x, x) for x in crop_from_size]

    exp_root = '../exp'
    checkpoint_folder = os.path.join(exp_root, args.exp_dir, 'checkpoints')

    if not os.path.isdir(checkpoint_folder):
        os.makedirs(checkpoint_folder)

    init_checkpoint_folder = os.path.join(exp_root, args.exp_dir,
                                          'init_checkpoints')

    if not os.path.isdir(init_checkpoint_folder):
        os.makedirs(init_checkpoint_folder)

    # log the setup for the experiments
    args_dict = vars(args)
    with open(os.path.join(exp_root, args.exp_dir, 'args.txt'), 'a') as f:
        f.write(json.dumps(args_dict, sort_keys=True, indent=4))

    # make sure the dataset is ready
    if 'inat' in args.dataset:
        setup_dataset('inat')
    else:
        setup_dataset(args.dataset)

    # ==================  Craete data loader ==================================
    data_transforms = {
        'train': [transforms.Compose([
            transforms.Resize(x[0]),
            # transforms.CenterCrop(x[1]),
            transforms.RandomCrop(x[1]),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) \
            for x in zip(crop_from_size, input_size)],
        'val': [transforms.Compose([
            transforms.Resize(x[0]),
            transforms.CenterCrop(x[1]),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) \
            for x in zip(crop_from_size, input_size)],
    }

    if args.dataset == 'cub':
        from CUBDataset import CUBDataset as dataset
    elif args.dataset == 'cars':
        from CarsDataset import CarsDataset as dataset
    elif args.dataset == 'aircrafts':
        from AircraftsDataset import AircraftsDataset as dataset
    elif 'inat' in args.dataset:
        from iNatDataset import iNatDataset as dataset
        if args.dataset == 'inat':
            subset = None
        else:
            subset = args.dataset[len('inat_'):]
            subset = subset[0].upper() + subset[1:]
    else:
        raise ValueError('Unknown dataset: %s' % task)

    if 'inat' in args.dataset:
        dset = {x: dataset(dset_root['inat'], split[x], subset, \
                        transform=data_transforms[x]) for x in ['train', 'val']}
        dset_test = dataset(dset_root['inat'], 'test', subset, \
                        transform=data_transforms['val'])
    else:
        dset = {x: dataset(dset_root[args.dataset], split[x], \
                        transform=data_transforms[x]) for x in ['train', 'val']}
        dset_test = dataset(dset_root[args.dataset], 'test', \
                        transform=data_transforms['val'])


    dset_loader = {x: torch.utils.data.DataLoader(dset[x],
                batch_size=args.batch_size, shuffle=True, num_workers=8,
                drop_last=drop_last) \
                for x, drop_last in zip(['train', 'val'], [True, False])}

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    #======================= Initialize the model =========================

    # The argument embedding is used only when tensor_sketch is True
    # The argument order is used only when the model parameters are shared
    # between feature extractors
    model = create_bcnn_model(model_names_list,
                              len(dset['train'].classes),
                              args.pooling_method,
                              fine_tune,
                              pre_train,
                              embedding,
                              order,
                              m_sqrt_iter=args.matrix_sqrt_iter,
                              fc_bottleneck=args.fc_bottleneck,
                              proj_dim=args.proj_dim,
                              update_sketch=args.update_sketch,
                              gamma=args.gamma)
    model = model.to(device)
    model = torch.nn.DataParallel(model)

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()

    #====================== Initialize optimizer ==============================
    init_model_checkpoint = os.path.join(init_checkpoint_folder,
                                         'checkpoint.pth.tar')
    start_itr = 0
    optim_fc = initialize_optimizer(
        model,
        args.init_lr,
        optimizer='sgd',
        wd=args.init_wd,
        finetune_model=False,
        proj_lr=args.proj_lr,
        proj_wd=args.proj_wd,
    )

    logger_name = 'train_init_logger'
    logger = initializeLogging(
        os.path.join(exp_root, args.exp_dir, 'train_init_history.txt'),
        logger_name)

    model_train_fc = False
    fc_model_path = os.path.join(exp_root, args.exp_dir, 'fc_params.pth.tar')
    if not args.train_from_beginning:
        if os.path.isfile(fc_model_path):
            # load the fc parameters if they are already trained
            print("=> loading fc parameters'{}'".format(fc_model_path))
            checkpoint = torch.load(fc_model_path)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded fc initialization parameters")
        else:
            if os.path.isfile(init_model_checkpoint):
                # load the checkpoint if it exists
                print(
                    "=> loading checkpoint '{}'".format(init_model_checkpoint))
                checkpoint = torch.load(init_model_checkpoint)
                start_itr = checkpoint['itr']
                model.load_state_dict(checkpoint['state_dict'])
                optim_fc.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint for the fc initialization")

            # resume training
            model_train_fc = True
    else:
        # Training everything from the beginning
        model_train_fc = True
        start_itr = 0

    if model_train_fc:
        # do the training
        if not fine_tune:
            model.eval()

        model = train_model(model,
                            dset_loader,
                            criterion,
                            optim_fc,
                            batch_size_update=256,
                            epoch=args.init_epoch,
                            logger_name=logger_name,
                            start_itr=start_itr,
                            checkpoint_folder=init_checkpoint_folder,
                            fine_tune=fine_tune)
        shutil.copyfile(
            os.path.join(init_checkpoint_folder, 'model_best.pth.tar'),
            fc_model_path)

    if fine_tune:
        optim = initialize_optimizer(model,
                                     args.lr,
                                     optimizer=args.optimizer,
                                     wd=args.wd,
                                     finetune_model=fine_tune,
                                     beta1=args.beta1,
                                     beta2=args.beta2)

        # if 'inat' not in args.dataset:
        if True:
            scheduler = torch.optim.lr_scheduler.LambdaLR(
                optim, lr_lambda=lambda epoch: 0.1**(epoch // 25))
        else:
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optim, 'max')

        logger_name = 'train_logger'
        logger = initializeLogging(
            os.path.join(exp_root, args.exp_dir, 'train_history.txt'),
            logger_name)

        start_itr = 0
        # load from checkpoint if exist
        if not args.train_from_beginning:
            checkpoint_filename = os.path.join(checkpoint_folder,
                                               'checkpoint.pth.tar')
            if os.path.isfile(checkpoint_filename):
                print("=> loading checkpoint '{}'".format(checkpoint_filename))
                checkpoint = torch.load(checkpoint_filename)
                start_itr = checkpoint['itr']
                model.load_state_dict(checkpoint['state_dict'])
                optim.load_state_dict(checkpoint['optimizer'])
                scheduler.load_state_dict(checkpoint['scheduler'])
                print("=> loaded checkpoint '{}' (iteration{})".format(
                    checkpoint_filename, checkpoint['itr']))

        # parallelize the model if using multiple gpus
        # if torch.cuda.device_count() > 1:

        # Train the miodel
        model = train_model(
            model,
            dset_loader,
            criterion,
            optim,
            batch_size_update=args.batch_size_update_model,
            # maxItr=args.iteration, logger_name=logger_name,
            epoch=args.epoch,
            logger_name=logger_name,
            checkpoint_folder=checkpoint_folder,
            start_itr=start_itr,
            scheduler=scheduler)
示例#22
0
    checkpoint = torch.load("fin.pth")
    print(model.state_dict().keys())
    states_to_load = {}
    for name, param in checkpoint["state_dict"].items():
        if name.startswith("conv"):
            states_to_load[name] = param
    for c in states_to_load:
        print(c)
    print("Number of parameter variables to load:", len(states_to_load))
    model_state = model.state_dict()
    print("Number of parameter variables in the model:", len(model_state))

    model_state.update(states_to_load)

    model.load_state_dict(model_state)
    optim.load_state_dict(checkpoint["optimizer"])

    train_part34(model, optim)
    #check_accuracy_part34(loader_val, model)

    input("End Part load")
########################################
model = MyModel()
optim = optim.Adam(model.parameters(), lr=0.001)

for (x, y) in loader_train:
    #print(x.shape)
    model(x)
    break

losses = train_part34(model, optim, epochs=2)
示例#23
0
    output_shape = v_y.shape[2]
    lstm = mdnLSTM(input_size=input_size,
                   hidden_size=hidden_size,
                   number_mixtures=number_mixtures).to(DEVICE)
    optim = torch.optim.Adam(lstm.parameters(), lr=args.learning_rate)

    model_save_name = 'model'
    if args.load:
        if not os.path.exists(args.model_loadname):
            print("load model: %s does not exist" % args.model_loadname)
            sys.exit()
        else:
            print("loading %s" % args.model_loadname)
            lstm_dict = torch.load(args.model_loadname)
            lstm.load_state_dict(lstm_dict['state_dict'])
            optim.load_state_dict(lstm_dict['optimizer'])
            train_cnts = lstm_dict['train_cnts']
            train_losses = lstm_dict['train_losses']
            test_cnts = lstm_dict['test_cnts']
            test_losses = lstm_dict['test_losses']

    loop(data_loader,
         save_every=save_every,
         num_epochs=args.num_epochs,
         train_losses=train_losses,
         test_losses=test_losses,
         train_cnts=train_cnts,
         test_cnts=test_cnts,
         dummy=args.dummy)

    embed()
示例#24
0
import torch
import torch.nn as nn
import torch.optim as optim
import os

os.environ['CUDA_VISIBLE_DEVICES'] = "3"


class fnn(nn.Module):
    def __init__(self):
        super(fnn, self).__init__()
        self.linear


model = nn.Sequential(nn.Linear(5, 10), nn.Linear(10, 5))
model = model.cuda()

optim = optim.Adam(model.parameters())

torch.save(optim.state_dict(), 'qq')
optim.load_state_dict(torch.load('qq'))

for state in optim.state:
    print(state)
示例#25
0
		test_loader = DataLoader(VQADataset(img_feats, test_qa_map, args.use_q==1), batch_size=args.batch_size, shuffle=False, collate_fn=pad_collate_fn)


	model = LSTMTextModel(visual_dim=args.feat_dim, lang_dim=args.wv_dim, hidden_dim=args.hidden_dim, out_dim=1, mlp_dims=[1024, 512, 512], embed_weights=embeds, finetune_embeds=args.finetune_embeds, n_layers=args.n_layers, bidirectional=args.bidir, img2seq=args.img2seq, dropout=args.dropout)
	if args.loss == 'BCE':
		loss_fn = torch.nn.BCEWithLogitsLoss()
	elif args.loss == 'rank':
		loss_fn = torch.nn.MarginRankingLoss(margin=args.margin)
    # only pass in parameters that require grad
	optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)

	if args.use_pretrain and args.pretrained_path:
		print('Using pretrained model', args.pretrained_path)
		pretrained = torch.load(args.pretrained_path)
		model.load_state_dict(pretrained['model'])
		optim.load_state_dict(pretrained['optim'])
		# set model lr to new lr
		for param_group in optim.param_groups:
			before = param_group['lr']
			param_group['lr'] = args.lr
			print('optim lr: before={} / after={}'.format(before, args.lr))
	if USE_GPU:
		print("Use GPU")
		model = model.cuda()
		loss_fn = loss_fn.cuda()
	else:
		print("Use CPU")

	if args.mode == 'train':
		best_acc = 0
		stats = {'train_loss':[], 'train_acc':[], 'val_acc':[]}
示例#26
0
def main():
    #######################################1.data loader###########################################
    train_transforms = transforms.Compose([
        transforms.Scale(
            256),  # rescale the image keeping the original aspect ratio
        transforms.CenterCrop(256),  # we get only the center of that rescaled
        transforms.RandomCrop(224),  # random crop within the center crop 
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    val_transforms = transforms.Compose([
        transforms.Scale(
            256),  # rescale the image keeping the original aspect ratio
        transforms.CenterCrop(224),  # we get only the center of that rescaled
        transforms.ToTensor(),
    ])
    test_transforms = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(256),
        transforms.ToTensor(),
    ])

    traindata = Data(opts.train_list_str, None, None, train_transforms, None,
                     None, opts.data_path, opts.num_classes)
    valdata = Data(None, None, opts.val_list_str, None, val_transforms, None,
                   opts.data_path, opts.num_classes)
    testdata = Data(None, opts.test_list_str, None, None, None,
                    test_transforms, opts.data_path, opts.num_classes)

    train_loader = DataLoader(traindata,
                              batch_size=opts.batch_size,
                              shuffle=True,
                              num_workers=opts.workers,
                              pin_memory=True)
    print('Training loader prepared')

    val_loader = DataLoader(valdata,
                            batch_size=opts.batch_size,
                            shuffle=False,
                            num_workers=opts.workers,
                            pin_memory=True)
    print('Validation loader prepared')

    #testloader=Dataloader(testdata,testloader,)

    ##########################################2.model#################################################
    model = imvstxt()
    model.visionMLP = torch.nn.DataParaller(model.visionMLP, devce_ids=[0, 1])
    if opts.cuda:
        model.cuda()

    ########################################3.train && optimer###########################################

    #define loss function (criterion) and optimzer
    #cosine similarity between embeddings ->input1 ,input2,target
    if opts.cuda:
        cosine_crit = nn.CosineEmbeddingLoss(0.1).cuda()
    else:
        cosine_crit = nn.CosineEmbeddingLoss(0.1)

    if opts.semantic_reg:
        weights_class = torch.Tensor(opts.numClasses).fill_(1)
        weights_class[0] = 0  # the background class is set to 0, i.e. ignore
        # CrossEntropyLoss combines LogSoftMax and NLLLoss in one single class
        class_crit = nn.CosineEmbeddingLoss(weigth=weights_class).cuda()
        # we will use two different criterion
        criterion = [cosine_crit, class_crit]
    else:
        criterion = cosine_crit

    ##creating different parameter groups
    vision_params = list(map(id, model.visionMLP.parameters()))
    base_params = filter(lambda p: id(p) not in vision_params,
                         model.parameters())

    optim = optim.Adam([{
        'params': base_params
    }, {
        'params': model.visionMLP.parameters(),
        'lr': opts.lr * opts.freeVision
    }],
                       lr=opts.lr * opts.freeText)

    #if checkpoint exsit
    if opts.resume:
        if os.path.isfile(opts.resume):
            print("=> loading checkpoint '{}'".format(opts.resume))
            checkpoint = torch.load(opts.resume)
            opts.start_epoch = checkpoint['epoch']
            best_val = checkpoint['best_val']
            model.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                opts.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(opts.resume))
            best_val = float('inf')
    else:
        best_val = float('inf')

    #model
    trainer = trainer.Trainer(cuda=opts.cuda,
                              model=model,
                              optimizer=optim,
                              criterion=criterion,
                              train_loader=train_loader,
                              val_loader=val_loader,
                              max_iter=opts.max_iter)
    try:
        trainer.train(best_val)
    except:
        raise
示例#27
0
    def train(self,
              model,
              dset_type,
              train_loader,
              val_loader,
              resume=False,
              num_epochs=10,
              log_nth=0):
        """
        Train a given model with the provided data.

        Inputs:
        - model: model object initialized from a torch.nn.Module
        - dset_type: data set type, string: SUN or NYU  
        - train_loader: train data in torch.utils.data.DataLoader
        - val_loader: val data in torch.utils.data.DataLoader
        - resume: bool parameter, indicating training mode
        - num_epochs: total number of training epochs
        - log_nth: log training accuracy and loss every nth iteration
        """

        optim = self.optim(model.parameters(), **self.optim_args)
        criterion = self.loss_func
        self._reset_histories()
        iter_per_epoch = len(train_loader)
        val_iter_per_epoch = len(val_loader)
        print(val_iter_per_epoch)

        if resume:
            print("[PROGRESS] Selected Training Mode: RESUME")
            if dset_type == 'NYU':
                if (not self.is_HHA):
                    model_path = '../models/nyu/checkpoint25.pth.tar'
                else:
                    model_path = '../models/nyu_hha/checkpoint25.pth.tar'
            elif dset_type == 'SUN':
                if (not self.is_HHA):
                    model_path = '../models/sun/checkpoint25.pth.tar'
                else:
                    model_path = '../models/sun_hha/checkpoint25.pth.tar'

            if os.path.isfile(model_path):
                print("[PROGRESS] Loading checkpoint: '{}'".format(model_path))
                checkpoint = torch.load(model_path)
                self.best_model = model
                self.start_epoch = checkpoint['epoch']
                self.best_val_acc = checkpoint['best_val_acc']
                model.load_state_dict(checkpoint['state_dict'])
                self.best_model.load_state_dict(checkpoint['best_state_dict'])
                self.train_loss_history = checkpoint['train_loss_hist']
                self.train_acc_history = checkpoint['train_acc_hist']
                self.val_acc_history = checkpoint['val_acc_hist']

                optim.load_state_dict(checkpoint['optimizer'])
                print("[PROGRESS] Checkpoint loaded")
                print("[PROGRESS] Resuming from epoc {}".format(
                    checkpoint['epoch']))
                print("[PROGRESS] TRAINING CONTINUES")
            else:
                print("[ERROR] No checkpoint found at '{}'".format(model_path))
        else:
            print("[PROGRESS] Selected Training Mode: NEW")
            print("[PROGRESS] TRAINING STARTS")

        #print(self.train_loss_history)
        #print(self.train_acc_history)
        #print(self.val_acc_history)

        end_epoch = self.start_epoch + num_epochs
        for epoch in range(self.start_epoch,
                           end_epoch):  # loop over the dataset multiple times
            timestep1 = time()
            self.update_learning_rate(optim, epoch)
            running_loss = 0.0

            model.train()
            for i, data in enumerate(train_loader, 0):
                timestep2 = time()
                rgb_inputs = Variable(data[0].cuda(self.gpu_device))
                d_inputs = Variable(data[1].cuda(self.gpu_device))
                labels = Variable(data[2].cuda(self.gpu_device))

                batch_size = len(rgb_inputs)
                first_it = (i == 0) and (epoch == 0)
                epoch_end = ((i + 1) % iter_per_epoch) == 0

                # zero the parameter gradients
                optim.zero_grad()

                # forward + backward + optimize
                outputs = model(rgb_inputs, d_inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optim.step()
                self.running_loss += loss.data[0]
                running_loss += loss.data[0]

                # print statistics
                if (i + 1) % log_nth == 0 or (
                        i + 1
                ) == iter_per_epoch:  # print every log_nth mini-batches
                    timestep3 = time()
                    running_loss = running_loss / log_nth
                    print(
                        "\r[EPOCH: %d/%d Iter: %d/%d ] Loss: %.3f Best Acc: %.3f LR: %.2e Time: %.2f seconds"
                        % (epoch + 1, end_epoch, i + 1, iter_per_epoch,
                           running_loss, self.best_val_acc,
                           optim.param_groups[0]['lr'],
                           (timestep3 - timestep2))),

                # log and save the accuracies
                if epoch_end:
                    train_scores = []
                    val_scores = []

                    self.running_loss /= (i + 1)
                    # print(self.running_loss)
                    # print(self.running_loss, i+1)
                    self.train_loss_history.append(self.running_loss)

                    _, train_preds = torch.max(outputs, 1)

                    labels_mask = labels > 0
                    labels = labels - 1
                    train_scores.append(
                        np.mean((train_preds == labels
                                 )[labels_mask].data.cpu().numpy()))

                    model.eval()
                    for batch in val_loader:
                        val_rgb_inputs = Variable(batch[0].cuda(
                            self.gpu_device))
                        val_d_inputs = Variable(batch[1].cuda(self.gpu_device))
                        val_labels = Variable(batch[2].cuda(self.gpu_device))
                        val_outputs = model(val_rgb_inputs, val_d_inputs)
                        _, val_preds = torch.max(val_outputs, 1)

                        val_labels_mask = val_labels > 0
                        val_labels = val_labels - 1
                        val_scores.append(
                            np.mean((val_preds == val_labels
                                     )[val_labels_mask].data.cpu().numpy()))

                    train_acc = np.mean(train_scores)
                    val_acc = np.mean(val_scores)

                    self.train_acc_history.append(train_acc)
                    self.val_acc_history.append(val_acc)

                    print(
                        "[EPOCH: %d/%d] TRAIN Acc/Loss: %.3f/%.3f VALIDATION Acc: %.3f "
                        % (epoch + 1, end_epoch, train_acc, self.running_loss,
                           val_acc))
                    self.running_loss = 0.0
                    # Save the checkpoint and update the model
                    is_best = val_acc > self.best_val_acc

                    if is_best:
                        self.best_model = model
                        if is_best or (epoch + 1) % 10 == 0:
                            self.best_val_acc = max(val_acc, self.best_val_acc)
                            self.save_checkpoint(
                                {
                                    'epoch': epoch + 1,
                                    'state_dict': model.state_dict(),
                                    'best_state_dict':
                                    self.best_model.state_dict(),
                                    'best_val_acc': self.best_val_acc,
                                    'train_loss_hist': self.train_loss_history,
                                    'train_acc_hist': self.train_acc_history,
                                    'val_acc_hist': self.val_acc_history,
                                    'optimizer': optim.state_dict()
                                }, is_best, dset_type)
                timestep4 = time()
            #print('Epoch %i took %.2f seconds' %(epoch + 1,timestep4 - timestep1))

        # Calculate IoU and Mean accuracies
        num_classes = val_outputs.size(1)
        print num_classes
        val_confusion = np.zeros((num_classes, 3))
        IoU = 0
        mean_acc = 0

        for batch in val_loader:
            val_rgb_inputs = Variable(batch[0].cuda(self.gpu_device))
            val_d_inputs = Variable(batch[1].cuda(self.gpu_device))
            val_labels = Variable(batch[2].cuda(self.gpu_device))
            val_outputs = self.best_model(val_rgb_inputs, val_d_inputs)
            _, val_preds = torch.max(val_outputs, 1)

            val_labels = val_labels - 1

            for i in range(num_classes):
                val_labels_mask = val_labels == i
                val_preds_mask = val_preds == i
                TP = np.sum((val_preds == val_labels
                             )[val_labels_mask].data.cpu().numpy())
                #print TP
                val_confusion[i, 0] += TP
                val_confusion[i, 1] += np.sum(
                    (val_labels
                     == val_labels)[val_labels_mask].data.cpu().numpy()) - TP
                val_confusion[i, 2] += np.sum(
                    (val_preds
                     == val_preds)[val_preds_mask].data.cpu().numpy()) - TP

        for i in range(num_classes):
            TP, FP, FN = val_confusion[i]
            print(TP + FP, FN)
            IoU += TP / (TP + FP + FN)
            mean_acc += TP / (TP + FP)
        IoU /= num_classes
        mean_acc /= num_classes

        print("[FINAL] TRAINING COMPLETED")
        print(
            "        Best VALIDATION Accuracy: %.3f IoU: %.3f Mean Accuracy: %.3f"
            % (self.best_val_acc, IoU, mean_acc))
        print(
            "        Orgnal. FuseNet Accuracy: 0.66  IoU: 0.327 Mean Accuracy: 0.434"
        )
示例#28
0
文件: finetune.py 项目: pgsrv/bcnn
def main(args):

    log_dir = args.exp_dir+'/log'
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)
    writer = SummaryWriter(log_dir)

    batch_size = 32
    maxIter = 10000
    split = 'val'
    input_size = 224

    if not os.path.isdir(args.exp_dir):
        os.makedirs(args.exp_dir)
    if not os.path.isdir(os.path.join(args.exp_dir, args.task)):
        os.makedirs(os.path.join(args.exp_dir, args.task))
    checkpoint_folder = os.path.join(args.exp_dir, args.task, 'checkpoints')
    if not os.path.isdir(checkpoint_folder):
        os.makedirs(checkpoint_folder)

    logger_name = 'train_logger'
    logger = initializeLogging(os.path.join(args.exp_dir, args.task, 
            'train_history.txt'), logger_name)

    # ==================  Craete data loader ==================================
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize(input_size), 
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    if args.task == 'cub':
        from CUBDataset import CUBDataset
        image_datasets = {split: CUBDataset(dset_root['cub'], split,
            create_val=True, transform=data_transforms[split]) \
            for split in ['train', 'val']}
    elif args.task == 'cars':
        from CarsDataset import CarsDataset
        image_datasets = {split: CarsDataset(dset_root['cars'], split,
            create_val=True, transform=data_transforms[split]) \
            for split in ['train', 'val']}
    elif args.task == 'aircrafts':
        from AircraftsDataset import AircraftsDataset
        image_datasets = {split: AircraftsDataset(dset_root['aircrafts'], split,
            transform=data_transforms[split]) \
            for split in ['train', 'val']}
    elif args.task[:len('inat_')] == 'inat_':
        from iNatDataset import iNatDataset
        task = args.task
        subtask = task[len('inat_'):]
        subtask = subtask[0].upper() + subtask[1:]
        image_datasets = {split: iNatDataset(dset_root['inat'], split, subtask,
            transform=data_transforms[split]) \
            for split in ['train', 'val']}
    else:
        raise ValueError('Unknown dataset: %s' % task)


    num_classes = image_datasets['train'].get_num_classes()

    dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x],
                batch_size=args.batch_size, shuffle=True, num_workers=4) \
                for x in ['train', 'val']}

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    #======================= Initialize the model==============================
    model_ft, input_size = initialize_model(args.model, num_classes, 
                    feature_extract=False, use_pretrained=True)
    if args.stn:
        model_ft = STNet(model_ft)
    model_ft = model_ft.to(device)

    #====================== Initialize optimizer ==============================
    optim = initialize_optimizer(model_ft, feature_extract=False, stn=args.stn)

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()

    start_epoch = 0
    # load from checkpoint if exist
    if not args.train_from_beginning:
        checkpoint_filename = os.path.join(checkpoint_folder,
                    'checkpoint.pth.tar')
        if os.path.isfile(checkpoint_filename):
            print("=> loading checkpoint '{}'".format(checkpoint_filename))
            checkpoint = torch.load(checkpoint_filename)
            start_epoch = checkpoint['epoch']
            best_acc= checkpoint['best_acc']
            model_ft.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(checkpoint_filename, checkpoint['epoch']))

    # parallelize the model if using multiple gpus
    if torch.cuda.device_count() > 1:
        model_ft = torch.nn.DataParallel(model_ft)
        
    # Train the miodel
    model_ft = train_model(model_ft, dataloaders_dict, criterion, optim,
            num_epochs=args.num_epochs, is_inception=(args.model=="inception"),
            logger_name=logger_name, checkpoint_folder=checkpoint_folder,
            start_epoch=start_epoch, writer=writer)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--exp_name', default='resnet50_vggface')
    parser.add_argument('-c', '--config', type=int, default=1,
                        choices=configurations.keys())
    parser.add_argument('-d', '--dataset_path', 
                        default='/srv/data1/arunirc/datasets/vggface2')
    parser.add_argument('-m', '--model_path', default=None, 
                        help='Initialize from pre-trained model')
    parser.add_argument('--resume', help='Checkpoint path')
    parser.add_argument('--bottleneck', action='store_true', default=False,
                        help='Add a 512-dim bottleneck layer with L2 normalization')
    args = parser.parse_args()

    # gpu = args.gpu
    cfg = configurations[args.config]
    out = get_log_dir(args.exp_name, args.config, cfg, verbose=False)
    resume = args.resume

    # os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu)
    cuda = torch.cuda.is_available()

    torch.manual_seed(1337)
    if cuda:
        torch.cuda.manual_seed(1337)
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True # enable if all images are same size    



    # -----------------------------------------------------------------------------
    # 1. Dataset
    # -----------------------------------------------------------------------------
    #  Images should be arranged like this:
    #   data_root/
    #       class_1/....jpg..
    #       class_2/....jpg.. 
    #       ......./....jpg.. 
    data_root = args.dataset_path
    kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {}
    RGB_MEAN = [ 0.485, 0.456, 0.406 ]
    RGB_STD = [ 0.229, 0.224, 0.225 ]
    
    # Data transforms
    # http://pytorch.org/docs/master/torchvision/transforms.html
    train_transform = transforms.Compose([
        transforms.Scale(256),  # smaller side resized
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean = RGB_MEAN,
                             std = RGB_STD),
    ])
    val_transform = transforms.Compose([
        transforms.Scale(256), 
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean = RGB_MEAN,
                             std = RGB_STD),
    ])

    # Data loaders - using PyTorch built-in objects
    #   loader = DataLoaderClass(DatasetClass)
    #   * `DataLoaderClass` is PyTorch provided torch.utils.data.DataLoader
    #   * `DatasetClass` loads samples from a dataset; can be a standard class 
    #     provided by PyTorch (datasets.ImageFolder) or a custom-made class.
    #      - More info: http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder
    traindir = osp.join(data_root, 'train')
    dataset_train = datasets.ImageFolder(traindir, train_transform)
    
    # For unbalanced dataset we create a weighted sampler
    #   *  Balanced class sampling: https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3                     
    weights = utils.make_weights_for_balanced_classes(
                dataset_train.imgs, len(dataset_train.classes))                                                                
    weights = torch.DoubleTensor(weights)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))

    train_loader = torch.utils.data.DataLoader(
                    dataset_train, batch_size=cfg['batch_size'], 
                    sampler = sampler, **kwargs)

    valdir = osp.join(data_root, 'val-crop')
    val_loader = torch.utils.data.DataLoader(
                    datasets.ImageFolder(valdir, val_transform), 
                    batch_size=cfg['batch_size'], shuffle=False, **kwargs) 

    # print 'dataset classes:' + str(train_loader.dataset.classes)
    num_class = len(train_loader.dataset.classes)
    print 'Number of classes: %d' % num_class



    # -----------------------------------------------------------------------------
    # 2. Model
    # -----------------------------------------------------------------------------
    model = torchvision.models.resnet50(pretrained=False)

    if type(model.fc) == torch.nn.modules.linear.Linear:
        # Check if final fc layer sizes match num_class
        if not model.fc.weight.size()[0] == num_class:
            # Replace last layer
            print model.fc
            model.fc = torch.nn.Linear(2048, num_class)
            print model.fc
        else:
            pass
    else:
        pass    


    if args.model_path:
        # If existing model is to be loaded from a file
        checkpoint = torch.load(args.model_path) 

        if checkpoint['arch'] == 'DataParallel':
            # if we trained and saved our model using DataParallel
            model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7])
            model.load_state_dict(checkpoint['model_state_dict'])
            model = model.module # get network module from inside its DataParallel wrapper
        else:
            model.load_state_dict(checkpoint['model_state_dict'])
    
    # Optionally add a "bottleneck + L2-norm" layer after GAP-layer
    # TODO -- loading a bottleneck model might be a problem .... do some unit-tests
    if args.bottleneck:
        layers = []
        layers.append(torch.nn.Linear(2048, 512))
        layers.append(nn.BatchNorm2d(512))
        layers.append(torch.nn.ReLU(inplace=True))
        layers.append(models.NormFeat()) # L2-normalization layer
        layers.append(torch.nn.Linear(512, num_class))
        model.fc = torch.nn.Sequential(*layers)

    # TODO - config options for DataParallel and device_ids
    model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7])

    if cuda:
        model.cuda()  

    start_epoch = 0
    start_iteration = 0

    # Loss - cross entropy between predicted scores (unnormalized) and class labels (integers)
    criterion = nn.CrossEntropyLoss()
    if cuda:
        criterion = criterion.cuda()

    if resume:
        # Resume training from last saved checkpoint
        checkpoint = torch.load(resume)
        model.load_state_dict(checkpoint['model_state_dict'])
        start_epoch = checkpoint['epoch']
        start_iteration = checkpoint['iteration']
    else:
        pass


    # -----------------------------------------------------------------------------
    # 3. Optimizer
    # -----------------------------------------------------------------------------
    params = filter(lambda p: p.requires_grad, model.parameters()) 
    # Parameters with p.requires_grad=False are not updated during training.
    # This can be specified when defining the nn.Modules during model creation

    if 'optim' in cfg.keys():
        if cfg['optim'].lower()=='sgd':
            optim = torch.optim.SGD(params,
                        lr=cfg['lr'],
                        momentum=cfg['momentum'],
                        weight_decay=cfg['weight_decay'])

        elif cfg['optim'].lower()=='adam':
            optim = torch.optim.Adam(params,
                        lr=cfg['lr'], weight_decay=cfg['weight_decay'])

        else:
            raise NotImplementedError('Optimizers: SGD or Adam')
    else:
        optim = torch.optim.SGD(params,
                    lr=cfg['lr'],
                    momentum=cfg['momentum'],
                    weight_decay=cfg['weight_decay'])

    if resume:
        optim.load_state_dict(checkpoint['optim_state_dict'])


    # -----------------------------------------------------------------------------
    # [optional] Sanity-check: forward pass with a single batch
    # -----------------------------------------------------------------------------
    DEBUG = False
    if DEBUG:   
        # model = model.cpu()
        dataiter = iter(val_loader)
        img, label = dataiter.next()

        print 'Labels: ' + str(label.size()) # batchSize x num_class
        print 'Input: ' + str(img.size())    # batchSize x 3 x 224 x 224

        im = img.squeeze().numpy()
        im = im[0,:,:,:]    # get first image in the batch
        im = im.transpose((1,2,0)) # permute to 224x224x3
        im = im * [ 0.229, 0.224, 0.225 ] # unnormalize
        im = im + [ 0.485, 0.456, 0.406 ]
        im[im<0] = 0

        f = plt.figure()
        plt.imshow(im)
        plt.savefig('sanity-check-im.jpg')  # save transformed image in current folder
        inputs = Variable(img)
        if cuda:
            inputs = inputs.cuda()

        model.eval()
        outputs = model(inputs)
        print 'Network output: ' + str(outputs.size())        
        model.train()

    else:
        pass


    # -----------------------------------------------------------------------------
    # 4. Training
    # -----------------------------------------------------------------------------
    trainer = train.Trainer(
        cuda=cuda,
        model=model,
        criterion=criterion,
        optimizer=optim,
        init_lr=cfg['lr'],
        lr_decay_epoch = cfg['lr_decay_epoch'],
        train_loader=train_loader,
        val_loader=val_loader,
        out=out,
        max_iter=cfg['max_iteration'],
        interval_validate=cfg.get('interval_validate', len(train_loader)),
    )

    trainer.epoch = start_epoch
    trainer.iteration = start_iteration
    trainer.train()
示例#30
0
    input_size = 50
    lr = 1e-4
    rnn = RNN(input_size, hidden_size)
    optim = optim.Adam(rnn.parameters(), lr=lr, weight_decay=1e-6)
    if use_cuda:
        rnn.cuda()
    rnn_epoch = 0
    total_passes = 0

    train_loss = []
    test_loss = []
    if args.rnn_model_loadpath is not None:
        if os.path.exists(args.rnn_model_loadpath):
            rnn_model_dict = torch.load(args.rnn_model_loadpath)
            rnn.load_state_dict(rnn_model_dict['state_dict'])
            optim.load_state_dict(rnn_model_dict['optimizer'])
            rnn_epoch = rnn_model_dict['epoch']
            try:
                total_passes = rnn_model_dict['total_passes']
                train_loss = rnn_model_dict['train_loss']
                test_loss = rnn_model_dict['test_loss']
            except:
                print("could not load total passes")
            print("loaded rnn from %s at epoch %s" %
                  (args.rnn_model_loadpath, rnn_epoch))
        else:
            print("could not find model at %s" % args.rnn_model_loadpath)
            sys.exit()
    else:
        print("creating new model")
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--exp_name', default='resnet50_vggface')
    parser.add_argument('-c',
                        '--config',
                        type=int,
                        default=1,
                        choices=configurations.keys())
    parser.add_argument('-d',
                        '--dataset_path',
                        default='/srv/data1/arunirc/datasets/vggface2')
    parser.add_argument('-m',
                        '--model_path',
                        default=None,
                        help='Initialize from pre-trained model')
    parser.add_argument('--resume', help='Checkpoint path')
    parser.add_argument(
        '--bottleneck',
        action='store_true',
        default=False,
        help='Add a 512-dim bottleneck layer with L2 normalization')
    args = parser.parse_args()

    # gpu = args.gpu
    cfg = configurations[args.config]
    out = get_log_dir(args.exp_name, args.config, cfg, verbose=False)
    resume = args.resume

    # os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu)
    cuda = torch.cuda.is_available()

    torch.manual_seed(1337)
    if cuda:
        torch.cuda.manual_seed(1337)
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True  # enable if all images are same size

    # -----------------------------------------------------------------------------
    # 1. Dataset
    # -----------------------------------------------------------------------------
    #  Images should be arranged like this:
    #   data_root/
    #       class_1/....jpg..
    #       class_2/....jpg..
    #       ......./....jpg..
    data_root = args.dataset_path
    kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {}
    RGB_MEAN = [0.485, 0.456, 0.406]
    RGB_STD = [0.229, 0.224, 0.225]

    # Data transforms
    # http://pytorch.org/docs/master/torchvision/transforms.html
    train_transform = transforms.Compose([
        transforms.Scale(256),  # smaller side resized
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=RGB_MEAN, std=RGB_STD),
    ])
    val_transform = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=RGB_MEAN, std=RGB_STD),
    ])

    # Data loaders - using PyTorch built-in objects
    #   loader = DataLoaderClass(DatasetClass)
    #   * `DataLoaderClass` is PyTorch provided torch.utils.data.DataLoader
    #   * `DatasetClass` loads samples from a dataset; can be a standard class
    #     provided by PyTorch (datasets.ImageFolder) or a custom-made class.
    #      - More info: http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder
    traindir = osp.join(data_root, 'train')
    dataset_train = datasets.ImageFolder(traindir, train_transform)

    # For unbalanced dataset we create a weighted sampler
    #   *  Balanced class sampling: https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3
    weights = utils.make_weights_for_balanced_classes(
        dataset_train.imgs, len(dataset_train.classes))
    weights = torch.DoubleTensor(weights)
    sampler = torch.utils.data.sampler.WeightedRandomSampler(
        weights, len(weights))

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=cfg['batch_size'],
                                               sampler=sampler,
                                               **kwargs)

    valdir = osp.join(data_root, 'val-crop')
    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir, val_transform),
                                             batch_size=cfg['batch_size'],
                                             shuffle=False,
                                             **kwargs)

    # print 'dataset classes:' + str(train_loader.dataset.classes)
    num_class = len(train_loader.dataset.classes)
    print 'Number of classes: %d' % num_class

    # -----------------------------------------------------------------------------
    # 2. Model
    # -----------------------------------------------------------------------------
    model = torchvision.models.resnet50(pretrained=False)

    if type(model.fc) == torch.nn.modules.linear.Linear:
        # Check if final fc layer sizes match num_class
        if not model.fc.weight.size()[0] == num_class:
            # Replace last layer
            print model.fc
            model.fc = torch.nn.Linear(2048, num_class)
            print model.fc
        else:
            pass
    else:
        pass

    if args.model_path:
        # If existing model is to be loaded from a file
        checkpoint = torch.load(args.model_path)

        if checkpoint['arch'] == 'DataParallel':
            # if we trained and saved our model using DataParallel
            model = torch.nn.DataParallel(model,
                                          device_ids=[0, 1, 2, 3, 4, 5, 6, 7])
            model.load_state_dict(checkpoint['model_state_dict'])
            model = model.module  # get network module from inside its DataParallel wrapper
        else:
            model.load_state_dict(checkpoint['model_state_dict'])

    # Optionally add a "bottleneck + L2-norm" layer after GAP-layer
    # TODO -- loading a bottleneck model might be a problem .... do some unit-tests
    if args.bottleneck:
        layers = []
        layers.append(torch.nn.Linear(2048, 512))
        layers.append(nn.BatchNorm2d(512))
        layers.append(torch.nn.ReLU(inplace=True))
        layers.append(models.NormFeat())  # L2-normalization layer
        layers.append(torch.nn.Linear(512, num_class))
        model.fc = torch.nn.Sequential(*layers)

    # TODO - config options for DataParallel and device_ids
    model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3, 4, 5, 6, 7])

    if cuda:
        model.cuda()

    start_epoch = 0
    start_iteration = 0

    # Loss - cross entropy between predicted scores (unnormalized) and class labels (integers)
    criterion = nn.CrossEntropyLoss()
    if cuda:
        criterion = criterion.cuda()

    if resume:
        # Resume training from last saved checkpoint
        checkpoint = torch.load(resume)
        model.load_state_dict(checkpoint['model_state_dict'])
        start_epoch = checkpoint['epoch']
        start_iteration = checkpoint['iteration']
    else:
        pass

    # -----------------------------------------------------------------------------
    # 3. Optimizer
    # -----------------------------------------------------------------------------
    params = filter(lambda p: p.requires_grad, model.parameters())
    # Parameters with p.requires_grad=False are not updated during training.
    # This can be specified when defining the nn.Modules during model creation

    if 'optim' in cfg.keys():
        if cfg['optim'].lower() == 'sgd':
            optim = torch.optim.SGD(params,
                                    lr=cfg['lr'],
                                    momentum=cfg['momentum'],
                                    weight_decay=cfg['weight_decay'])

        elif cfg['optim'].lower() == 'adam':
            optim = torch.optim.Adam(params,
                                     lr=cfg['lr'],
                                     weight_decay=cfg['weight_decay'])

        else:
            raise NotImplementedError('Optimizers: SGD or Adam')
    else:
        optim = torch.optim.SGD(params,
                                lr=cfg['lr'],
                                momentum=cfg['momentum'],
                                weight_decay=cfg['weight_decay'])

    if resume:
        optim.load_state_dict(checkpoint['optim_state_dict'])

    # -----------------------------------------------------------------------------
    # [optional] Sanity-check: forward pass with a single batch
    # -----------------------------------------------------------------------------
    DEBUG = False
    if DEBUG:
        # model = model.cpu()
        dataiter = iter(val_loader)
        img, label = dataiter.next()

        print 'Labels: ' + str(label.size())  # batchSize x num_class
        print 'Input: ' + str(img.size())  # batchSize x 3 x 224 x 224

        im = img.squeeze().numpy()
        im = im[0, :, :, :]  # get first image in the batch
        im = im.transpose((1, 2, 0))  # permute to 224x224x3
        im = im * [0.229, 0.224, 0.225]  # unnormalize
        im = im + [0.485, 0.456, 0.406]
        im[im < 0] = 0

        f = plt.figure()
        plt.imshow(im)
        plt.savefig(
            'sanity-check-im.jpg')  # save transformed image in current folder
        inputs = Variable(img)
        if cuda:
            inputs = inputs.cuda()

        model.eval()
        outputs = model(inputs)
        print 'Network output: ' + str(outputs.size())
        model.train()

    else:
        pass

    # -----------------------------------------------------------------------------
    # 4. Training
    # -----------------------------------------------------------------------------
    trainer = train.Trainer(
        cuda=cuda,
        model=model,
        criterion=criterion,
        optimizer=optim,
        init_lr=cfg['lr'],
        lr_decay_epoch=cfg['lr_decay_epoch'],
        train_loader=train_loader,
        val_loader=val_loader,
        out=out,
        max_iter=cfg['max_iteration'],
        interval_validate=cfg.get('interval_validate', len(train_loader)),
    )

    trainer.epoch = start_epoch
    trainer.iteration = start_iteration
    trainer.train()