Exemplo n.º 1
0
def build_optim_bert(args, model, checkpoint):
    """ Build optimizer """

    if checkpoint is not None:
        optim = checkpoint['optims'][0]
        saved_optimizer_state_dict = optim.optimizer.state_dict()
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if args.visible_gpus != '-1':
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    else:
        optim = Optimizer(args.optim,
                          args.lr_bert,
                          args.max_grad_norm,
                          beta1=args.beta1,
                          beta2=args.beta2,
                          decay_method='noam',
                          warmup_steps=args.warmup_steps_bert)

    params = [(n, p) for n, p in list(model.named_parameters())
              if n.startswith('encoder.model')]
    optim.set_parameters(params)

    return optim
def build_optim(model, opt, checkpoint):
    """ Build optimizer """
    saved_optimizer_state_dict = None

    if opt.train_from:
        optim = checkpoint['optim']
        # We need to save a copy of optim.optimizer.state_dict() for setting
        # the, optimizer state later on in Stage 2 in this method, since
        # the method optim.set_parameters(model.parameters()) will overwrite
        # optim.optimizer, and with ith the values stored in
        # optim.optimizer.state_dict()
        saved_optimizer_state_dict = optim.optimizer.state_dict()
    else:
        optim = Optimizer(opt.optim,
                          opt.learning_rate,
                          opt.max_grad_norm,
                          lr_decay=opt.learning_rate_decay,
                          start_decay_steps=opt.start_decay_steps,
                          decay_steps=opt.decay_steps,
                          beta1=opt.adam_beta1,
                          beta2=opt.adam_beta2,
                          adagrad_accum=opt.adagrad_accumulator_init,
                          decay_method=opt.decay_method,
                          warmup_steps=opt.warmup_steps)

    # Stage 1:
    # Essentially optim.set_parameters (re-)creates and optimizer using
    # model.paramters() as parameters that will be stored in the
    # optim.optimizer.param_groups field of the torch optimizer class.
    # Importantly, this method does not yet load the optimizer state, as
    # essentially it builds a new optimizer with empty optimizer state and
    # parameters from the model.
    optim.set_parameters(model.named_parameters())

    if opt.train_from:
        # Stage 2: In this stage, which is only performed when loading an
        # optimizer from a checkpoint, we load the saved_optimizer_state_dict
        # into the re-created optimizer, to set the optim.optimizer.state
        # field, which was previously empty. For this, we use the optimizer
        # state saved in the "saved_optimizer_state_dict" variable for
        # this purpose.
        # See also: https://github.com/pytorch/pytorch/issues/2830
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        # Convert back the state values to cuda type if applicable
        if use_gpu(opt):
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        # We want to make sure that indeed we have a non-empty optimizer state
        # when we loaded an existing model. This should be at least the case
        # for Adam, which saves "exp_avg" and "exp_avg_sq" state
        # (Exponential moving average of gradient and squared gradient values)
        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    return optim
Exemplo n.º 3
0
def build_optim(model, opt, checkpoint):
    """ Build optimizer """
    saved_optimizer_state_dict = None

    if opt.train_from:
        optim = checkpoint['optim']
        saved_optimizer_state_dict = optim.optimizer.state_dict()
    else:
        optim = Optimizer(
            opt.optim, opt.learning_rate, opt.max_grad_norm,
            lr_decay=opt.learning_rate_decay,
            start_decay_steps=opt.start_decay_steps,
            decay_steps=opt.decay_steps,
            beta1=opt.adam_beta1,
            beta2=opt.adam_beta2,
            adagrad_accum=opt.adagrad_accumulator_init,
            decay_method=opt.decay_method,
            warmup_steps=opt.warmup_steps)

    optim.set_parameters(model.named_parameters())

    if opt.train_from:
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if use_gpu(opt):
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    return optim
Exemplo n.º 4
0
def build_optim(args, model, checkpoint):
    """ Build optimizer """
    optim = Optimizer(args.optim,
                      args.lr,
                      args.max_grad_norm,
                      beta1=args.beta1,
                      beta2=args.beta2,
                      decay_method=args.decay_method,
                      warmup_steps=args.warmup_steps,
                      model_size=args.enc_hidden_size)

    optim.set_parameters(list(model.named_parameters()))

    if args.train_from != '':
        optim.optimizer.load_state_dict(checkpoint['optim'])
        if args.visible_gpu != '-1':
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    return optim
Exemplo n.º 5
0
def build_optim(model, opt, checkpoint):
    """ Build optimizer """
    saved_optimizer_state_dict = None

    if opt.train_from:
        optim = checkpoint["optim"]
        # We need to save a copy of optim.optimizer.state_dict() for setting
        # the, optimizer state later on in Stage 2 in this method, since
        # the method optim.set_parameters(model.parameters()) will overwrite
        # optim.optimizer, and with ith the values stored in
        # optim.optimizer.state_dict()
        # saved_optimizer_state_dict = optim.optimizer.state_dict()
        saved_optimizer_state_dict = optim
    else:
        optim = Optimizer(
            opt.optim,
            opt.learning_rate,
            opt.max_grad_norm,
            lr_decay=opt.learning_rate_decay,
            start_decay_steps=opt.start_decay_steps,
            decay_steps=opt.decay_steps,
            beta1=opt.adam_beta1,
            beta2=opt.adam_beta2,
            adagrad_accum=opt.adagrad_accumulator_init,
            decay_method=opt.decay_method,
            warmup_steps=opt.warmup_steps,
        )

    optim.set_parameters(model.named_parameters())

    if opt.train_from:
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if use_gpu(opt):
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == "adam") and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model"
                + " but optimizer state is empty"
            )

    return optim
Exemplo n.º 6
0
def create_optimizer(model_or_iterable, options=None):
    if options is None:
        options = copy.deepcopy(onmt.standard_options.stdOptions)
    if not isinstance(options, dict):
        options = mhf.convertToDictionary(options)
    options = handle_options(options)
    options = mhf.convertToNamedTuple(options)
    optim = onmt.Optim(options.optim,
                       options.learning_rate,
                       options.max_grad_norm,
                       lr_decay=options.learning_rate_decay,
                       start_decay_at=options.start_decay_at,
                       opt=options)

    try:
        optim.set_parameters(model_or_iterable.parameters())
    except AttributeError:
        optim.set_parameters(model_or_iterable)
    return optim
Exemplo n.º 7
0
def build_optim(model, args):
    """ Build optimizer """

    optim = Optimizer(args.optim,
                      args.learning_rate,
                      args.max_grad_norm,
                      lr_decay=args.learning_rate_decay,
                      start_decay_steps=args.start_decay_steps,
                      decay_steps=args.decay_steps,
                      beta1=args.adam_beta1,
                      beta2=args.adam_beta2,
                      adagrad_accum=args.adagrad_accumulator_init,
                      decay_method=args.decay_method,
                      warmup_steps=args.warmup_steps,
                      model_size=args.rnn_size)

    optim.set_parameters(model)

    return optim
Exemplo n.º 8
0
def build_optim(opt, model):
    """ Build optimizer """
    optim = Optimizer(opt.optim,
                      opt.learning_rate,
                      opt.max_grad_norm,
                      lr_decay=opt.learning_rate_decay,
                      start_decay_steps=opt.start_decay_steps,
                      decay_steps=opt.decay_steps,
                      beta1=opt.adam_beta1,
                      beta2=opt.adam_beta2,
                      adagrad_accum=opt.adagrad_accumulator_init,
                      decay_method=opt.decay_method,
                      warmup_steps=opt.warmup_steps,
                      model_size=opt.encoder_size)

    parameters = [[n, p] for n, p in model.named_parameters()
                  if p.requires_grad]
    optim.set_parameters(parameters)

    return optim
Exemplo n.º 9
0
def build_optim(args, model, checkpoint, generation=False):
    """ Build optimizer """

    if checkpoint is not None:
        optim = checkpoint['optims'][0]
        saved_optimizer_state_dict = optim.optimizer.state_dict()
        optim.optimizer.load_state_dict(saved_optimizer_state_dict)
        if args.visible_gpus != '-1':
            for state in optim.optimizer.state.values():
                for k, v in state.items():
                    if torch.is_tensor(v):
                        state[k] = v.cuda()

        if (optim.method == 'adam') and (len(optim.optimizer.state) < 1):
            raise RuntimeError(
                "Error: loaded Adam optimizer from existing model" +
                " but optimizer state is empty")

    else:
        if generation:
            optim = Optimizer(args.optim,
                              args.lr,
                              args.max_grad_norm,
                              beta1=args.beta1,
                              beta2=args.beta2,
                              decay_method='noam',
                              warmup_steps=args.warmup_steps)
        else:
            optim = Optimizer(args.optim,
                              args.lr,
                              args.max_grad_norm,
                              beta1=args.beta1,
                              beta2=args.beta2,
                              start_decay_steps=1,
                              decay_steps=10,
                              lr_decay=0.9999)

    optim.set_parameters(list(model.named_parameters()))

    return optim