def addParameterUpdateOps(self, model): if self.optimizer not in OPTIMIZER_DICT: raise Exception( "Optimizer {} unknown. Valid choices are {}".format( self.optimizer, ", ".join(OPTIMIZER_DICT.keys()))) optimizer_rule = OPTIMIZER_DICT[self.optimizer] if optimizer_rule == GRAD_OPTIMIZER.SGD: build_sgd( model, self.learning_rate, gamma=self.lr_decay, policy=self.lr_policy, stepsize=1, ) elif optimizer_rule == GRAD_OPTIMIZER.ADAGRAD: build_adagrad(model, self.learning_rate) elif optimizer_rule == GRAD_OPTIMIZER.ADAM: build_adam(model, self.learning_rate) elif optimizer_rule == GRAD_OPTIMIZER.FTRL: build_ftrl(model, self.learning_rate) else: print("Unrecognized in caffe2 setting, using default SGD", optimizer_rule) build_sgd(model, self.learning_rate)
def AddParameterUpdateOps( model, optimizer_input="SGD", base_learning_rate=0.01, *args, **kwargs ): if optimizer_input not in OPTIMIZER_DICT: raise Exception( "Optimizer {} unknown. Valid choices are {}" .format(optimizer_input, ', '.join(OPTIMIZER_DICT.keys())) ) optimizer_rule = OPTIMIZER_DICT[optimizer_input] if optimizer_rule == GRAD_OPTIMIZER.SGD: build_sgd( model, base_learning_rate, gamma=kwargs['gamma'], policy=kwargs['policy'], stepsize=1 ) elif optimizer_rule == GRAD_OPTIMIZER.ADAGRAD: build_adagrad(model, base_learning_rate) elif optimizer_rule == GRAD_OPTIMIZER.ADAM: build_adam(model, base_learning_rate) elif optimizer_rule == GRAD_OPTIMIZER.FTRL: build_ftrl(model, base_learning_rate) else: print( "Unrecognized in caffe2 setting, using default SGD", optimizer_rule ) build_sgd(model, base_learning_rate)
def AddTrainingOperators(model, softmax, label): """Adds training operators to the model.""" xent = model.LabelCrossEntropy([softmax, label], 'xent') loss = model.AveragedLoss(xent, "loss") AddAccuracy(model, softmax, label) model.AddGradientOperators([loss]) optimizer.build_adagrad( model, base_learning_rate=1e-1, policy="step", stepsize=1, gamma=0.9999, )
def build_optimizer(self, model, **kwargs): self._skip_gpu = True return build_adagrad(model, base_learning_rate=1.0, lars=0.5, rowWise=True, **kwargs)
def main(opt_name): workspace.FeedBlob('input', np.random.randn(2, 16).astype(np.float32)) workspace.FeedBlob('label', np.array([0, 1]).astype(np.float32)) helper = ModelHelper("sample_model") fc = brew.fc(helper, "input", "fc", dim_in=16, dim_out=8) relu = helper.Relu(fc, 'relu') fc2 = brew.fc(helper, relu, "fc2", dim_in=8, dim_out=1) label_ex = helper.ExpandDims("label", "label_ex", dims=[1]) xent = helper.SigmoidCrossEntropyWithLogits([fc2, label_ex], 'xent') loss = helper.AveragedLoss(xent, 'loss') helper.AddGradientOperators([loss]) if opt_name == "manual": ONE = helper.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0) LR = helper.param_init_net.ConstantFill([], "LR", shape=[1], value=-0.03) for param in helper.params: param_grad = helper.param_to_grad[param] helper.WeightedSum([param, ONE, param_grad, LR], param) elif opt_name == "sgd": optimizer.build_sgd(helper, 0.03) elif opt_name == "adagrad": optimizer.build_adagrad(helper, 0.03) # caffe2 does not support rowwise adagrad for dense parameters # caffe2 seems not have lamb support yet elif opt_name == "adam": optimizer.build_adam(helper, 0.03) else: assert False, f"Unsupported optimizer {opt_name}" workspace.RunNetOnce(helper.param_init_net) workspace.RunNetOnce(helper.net) import pdb pdb.set_trace()
def build_optimizer(self, model, **kwargs): self._skip_gpu = False return build_adagrad(model, base_learning_rate=1.0, **kwargs)
def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum): with core.DeviceScope(device_opts): if loss == 'cross_entropy': xent = model.LabelCrossEntropy([output, label], 'xent') loss = model.AveragedLoss(xent, "loss") elif loss == 'euclidean': dist = model.net.SquaredL2Distance([label, output], 'dist') loss = dist.AveragedLoss([], ['loss']) model.AddGradientOperators([loss]) if opt_type == 'adam': if policy == 'step': opt = optimizer.build_adam( model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, beta1=beta1, beta2=beta2, epsilon=epsilon) elif policy == 'fixed' or policy == 'inv': opt = optimizer.build_adam( model, base_learning_rate=base_learning_rate, policy=policy, beta1=beta1, beta2=beta2, epsilon=epsilon) print("adam optimizer selected") elif opt_type == 'sgd': if policy == 'step': opt = optimizer.build_sgd( model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, gamma=gamma, momentum=momentum) elif policy == 'fixed' or policy == 'inv': opt = optimizer.build_sgd( model, base_learning_rate=base_learning_rate, policy=policy, gamma=gamma, momentum=momentum) print("sgd optimizer selected") elif opt_type == 'rmsprop': if policy == 'step': opt = optimizer.build_rms_prop( model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, decay=gamma, momentum=momentum, epsilon=epsilon) elif policy == 'fixed' or policy == 'inv': opt = optimizer.build_rms_prop( model, base_learning_rate=base_learning_rate, policy=policy, decay=gamma, momentum=momentum, epsilon=epsilon) print("rmsprop optimizer selected") elif opt_type == 'adagrad': if policy == 'step': opt = optimizer.build_adagrad( model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, decay=gamma, epsilon=epsilon) elif policy == 'fixed' or policy == 'inv': opt = optimizer.build_adagrad( model, base_learning_rate=base_learning_rate, policy=policy, decay=gamma, epsilon=epsilon) print("adagrad optimizer selected")
def build_optimizer(self, model, **kwargs): self._skip_gpu = False return build_adagrad(model, base_learning_rate=1.0, **kwargs)
def build_optimizer(self, model): build_adagrad(model, base_learning_rate=1.0)