Пример #1
0
def AddParameterUpdateOps(
    model, optimizer_input="SGD", base_learning_rate=0.01, *args, **kwargs
):
    if optimizer_input not in OPTIMIZER_DICT:
        raise Exception(
            "Optimizer {} unknown. Valid choices are {}"
            .format(optimizer_input, ', '.join(OPTIMIZER_DICT.keys()))
        )
    optimizer_rule = OPTIMIZER_DICT[optimizer_input]

    if optimizer_rule == GRAD_OPTIMIZER.SGD:
        build_sgd(
            model,
            base_learning_rate,
            gamma=kwargs['gamma'],
            policy=kwargs['policy'],
            stepsize=1
        )
    elif optimizer_rule == GRAD_OPTIMIZER.ADAGRAD:
        build_adagrad(model, base_learning_rate)
    elif optimizer_rule == GRAD_OPTIMIZER.ADAM:
        build_adam(model, base_learning_rate)
    elif optimizer_rule == GRAD_OPTIMIZER.FTRL:
        build_ftrl(model, base_learning_rate)
    else:
        print(
            "Unrecognized in caffe2 setting, using default SGD", optimizer_rule
        )
        build_sgd(model, base_learning_rate)
Пример #2
0
    def addParameterUpdateOps(self, model):
        if self.optimizer not in OPTIMIZER_DICT:
            raise Exception(
                "Optimizer {} unknown. Valid choices are {}".format(
                    self.optimizer, ", ".join(OPTIMIZER_DICT.keys())))
        optimizer_rule = OPTIMIZER_DICT[self.optimizer]

        if optimizer_rule == GRAD_OPTIMIZER.SGD:
            build_sgd(
                model,
                self.learning_rate,
                gamma=self.lr_decay,
                policy=self.lr_policy,
                stepsize=1,
            )
        elif optimizer_rule == GRAD_OPTIMIZER.ADAGRAD:
            build_adagrad(model, self.learning_rate)
        elif optimizer_rule == GRAD_OPTIMIZER.ADAM:
            build_adam(model, self.learning_rate)
        elif optimizer_rule == GRAD_OPTIMIZER.FTRL:
            build_ftrl(model, self.learning_rate)
        else:
            print("Unrecognized in caffe2 setting, using default SGD",
                  optimizer_rule)
            build_sgd(model, self.learning_rate)
def AddOptimizerOps_adam(model):
    # Use adam as optimization function
    optimizer.build_adam(
        model,
        base_learning_rate=base_learning_rate
        #        policy="step",
        #        momentum=0.9,
        #        weight_decay=0.004
    )
Пример #4
0
 def build_optimizer(self, model, **kwargs):
     self._skip_gpu = False
     kwargs['beta1'] = 0.0
     return build_adam(model,
                       base_learning_rate=0.1,
                       use_smart_decay=True,
                       **kwargs)
Пример #5
0
def add_training_operators(output_segmentation, model, device_opts):

    with core.DeviceScope(device_opts):
        loss = model.SigmoidCrossEntropyWithLogits(
            [output_segmentation, "gt_segmentation"], 'loss')
        avg_loss = model.AveragedLoss(loss, "avg_loss")
        model.AddGradientOperators([loss])
        opt = optimizer.build_adam(model, base_learning_rate=0.01)
Пример #6
0
def main(opt_name):
    workspace.FeedBlob('input', np.random.randn(2, 16).astype(np.float32))
    workspace.FeedBlob('label', np.array([0, 1]).astype(np.float32))

    helper = ModelHelper("sample_model")
    fc = brew.fc(helper, "input", "fc", dim_in=16, dim_out=8)
    relu = helper.Relu(fc, 'relu')
    fc2 = brew.fc(helper, relu, "fc2", dim_in=8, dim_out=1)
    label_ex = helper.ExpandDims("label", "label_ex", dims=[1])
    xent = helper.SigmoidCrossEntropyWithLogits([fc2, label_ex], 'xent')
    loss = helper.AveragedLoss(xent, 'loss')
    helper.AddGradientOperators([loss])

    if opt_name == "manual":
        ONE = helper.param_init_net.ConstantFill([],
                                                 "ONE",
                                                 shape=[1],
                                                 value=1.0)
        LR = helper.param_init_net.ConstantFill([],
                                                "LR",
                                                shape=[1],
                                                value=-0.03)

        for param in helper.params:
            param_grad = helper.param_to_grad[param]
            helper.WeightedSum([param, ONE, param_grad, LR], param)
    elif opt_name == "sgd":
        optimizer.build_sgd(helper, 0.03)
    elif opt_name == "adagrad":
        optimizer.build_adagrad(helper, 0.03)
    # caffe2 does not support rowwise adagrad for dense parameters
    # caffe2 seems not have lamb support yet
    elif opt_name == "adam":
        optimizer.build_adam(helper, 0.03)
    else:
        assert False, f"Unsupported optimizer {opt_name}"

    workspace.RunNetOnce(helper.param_init_net)
    workspace.RunNetOnce(helper.net)

    import pdb
    pdb.set_trace()
Пример #7
0
 def build_optimizer(self, model, **kwargs):
     self._skip_gpu = False
     return build_adam(model, base_learning_rate=0.1, **kwargs)
Пример #8
0
    def build_net(
            self,
            base_learning_rate=0.1  # base_learning_rate * seq_size
    ):
        log.debug('>>> Building Mask-RNN')
        model = model_helper.ModelHelper(name="mask_rnn")

        hidden_init = model.net.AddExternalInputs('hidden_init', )
        # TODO: do I still need this?
        model.net.AddExternalInputs(
            'input_blob',
            'seq_lengths',
            'target',
        )
        # Add external inputs (read directly from the database)
        # the dimension of class_target_mask: [BATCH_SIZE, SEQ_LEN, 1]
        # the dimension of regre_target_mask: [BATCH_SIZE, SEQ_LEN, regre_output_dim]
        (seq_lengths, _input_blob, _class_target, _regre_target,
         _class_target_mask, _regre_target_mask) = build_input_reader(
             model,
             self.db_name,
             'minidb', [
                 'seq_lengths', 'input_blob_batch_first',
                 'class_target_batch_first', 'regre_target_batch_first',
                 'class_target_mask_batch_first',
                 'regre_target_mask_batch_first'
             ],
             batch_size=self.batch_size,
             data_type='train')

        # In order to put into batches, the input_blob is
        # [BATCH_SIZE, SEQ_LEN, INPUT_DIM]
        # i.e. the first dim is the batch size
        # However the required input dim is:
        # [SEQ_LEN, BATCH_SIZE, INPUT_DIM]
        input_blob = model.net.Transpose([_input_blob],
                                         'input_blob',
                                         axes=[1, 0, 2])
        class_target = model.net.Transpose([_class_target],
                                           'class_target',
                                           axes=[1, 0, 2])
        regre_target = model.net.Transpose([_regre_target],
                                           'regre_target',
                                           axes=[1, 0, 2])
        class_target_mask = model.net.Transpose([_class_target_mask],
                                                'class_target_mask',
                                                axes=[1, 0, 2])
        regre_target_mask = model.net.Transpose([_regre_target_mask],
                                                'regre_target_mask',
                                                axes=[1, 0, 2])

        hidden_output_all, self.hidden_output = MaskGRU(model,
                                                        input_blob,
                                                        seq_lengths,
                                                        (hidden_init, ),
                                                        self.input_dim,
                                                        self.hidden_size,
                                                        scope="MaskRNN")

        # axis is 2 as first two are T (time) and N (batch size)
        # multi-task learning: regression
        regre_output = brew.fc(model,
                               hidden_output_all,
                               None,
                               dim_in=self.hidden_size,
                               dim_out=self.regre_output_dim,
                               axis=2)
        # multi-task learning: classification
        class_output = brew.fc(model,
                               hidden_output_all,
                               None,
                               dim_in=self.hidden_size,
                               dim_out=self.class_output_dim,
                               axis=2)
        # softmax head for testing only
        class_softmax_output = model.net.Softmax(class_output,
                                                 'class_softmax_output',
                                                 axis=2)

        # Get the predict net
        (self.net_store['predict'],
         self.external_inputs) = model_helper.ExtractPredictorNet(
             model.net.Proto(),
             [input_blob, seq_lengths, hidden_init],
             [class_softmax_output, regre_output],
         )

        # Then, we add loss and gradient ops
        # We treat them as one big batch of size T * N
        # we use the logit of classification head
        # class_output_reshaped, _ = model.net.Reshape(
        #     class_output, ['class_output_reshaped', '_class_output_shape'],
        #     shape=[-1, self.class_output_dim])
        class_softmax_output_reshaped, _ = model.net.Reshape(
            class_softmax_output,
            ['class_softmax_output_reshaped', '_class_output_shape'],
            shape=[-1, self.class_output_dim])

        regre_output_reshaped, _ = model.net.Reshape(
            regre_output, ['regre_output_reshaped', '_regre_output_shape'],
            shape=[-1, self.regre_output_dim])

        class_target_reshaped, _ = model.net.Reshape(
            class_target, ['class_target_reshaped', '_class_target_shape'],
            shape=[-1, self.class_output_dim])
        regre_target_reshaped, _ = model.net.Reshape(
            regre_target, ['regre_target_reshaped', '_regre_target_shape'],
            shape=[-1, self.regre_output_dim])

        class_target_mask_reshaped, _ = model.net.Reshape(
            class_target_mask,
            ['class_target_mask_reshaped', '_class_target_mask_shape'],
            shape=[-1, 1])
        regre_target_mask_reshaped, _ = model.net.Reshape(
            regre_target_mask,
            ['regre_target_mask_reshaped', '_regre_target_mask_shape'],
            shape=[-1, self.regre_output_dim])

        # stop gradient to label and mask
        class_target_reshaped = model.net.StopGradient(
            class_target_reshaped, 'stopped_class_target_reshaped')
        regre_target_reshaped = model.net.StopGradient(
            regre_target_reshaped, 'stopped_regre_target_reshaped')
        class_target_mask_reshaped = model.net.StopGradient(
            class_target_mask_reshaped, 'stopped_class_target_mask_reshaped')
        regre_target_mask_reshaped = model.net.StopGradient(
            regre_target_mask_reshaped, 'stopped_regre_target_mask_reshaped')

        # model.net.Print([class_output_reshaped], 'print', to_file=0)
        # classification error
        # combined softmax and log likelihood for numerical stability
        # weighted by class_target_mask_reshaped
        #
        # _, class_average_loss = model.net.SoftmaxWithLoss(
        #     [class_output_reshaped, class_target_reshaped, class_target_mask_reshaped],
        #     ['_train_softmax_ouput', 'class_average_loss'], label_prob=1
        # )
        #
        class_l2_dist = model.net.SquaredL2Distance(
            [class_softmax_output_reshaped, class_target_reshaped],
            'class_l2_dist')
        class_target_mask_reshaped = model.net.Squeeze(
            class_target_mask_reshaped, 'squeezed_class_target_mask', dims=[1])
        masked_class_l2_dist = model.net.Mul(
            [class_target_mask_reshaped, class_l2_dist],
            'masked_class_l2_dist')
        class_average_loss = model.net.AveragedLoss(masked_class_l2_dist,
                                                    'class_average_loss')

        # regression error
        # mask need to be applied to *each* individual dimension of output vector
        regre_output_reshaped_list = model.net.Split(
            [regre_output_reshaped],
            [
                'regre_output_reshaped_' + str(i)
                for i in range(self.regre_output_dim)
            ],
            axis=1,  # has been reshaped to 2D tensor
        )
        regre_target_reshaped_list = model.net.Split(
            [regre_target_reshaped],
            [
                'regre_target_reshaped_' + str(i)
                for i in range(self.regre_output_dim)
            ],
            axis=1,  # has been reshaped to 2D tensor
        )
        regre_target_mask_reshaped_list = model.net.Split(
            [regre_target_mask_reshaped],
            [
                'regre_target_mask_reshaped_' + str(i)
                for i in range(self.regre_output_dim)
            ],
            axis=1,  # has been reshaped to 2D tensor
        )
        regre_average_loss_lst = []
        i = 0
        for o, t, m in zip(regre_output_reshaped_list,
                           regre_target_reshaped_list,
                           regre_target_mask_reshaped_list):
            l2_dist = model.net.SquaredL2Distance([o, t], 'l2_dist_' + str(i))
            m = model.net.Squeeze(m,
                                  'squeezed_regre_target_mask_' + str(i),
                                  dims=[1])
            masked_l2_dist = model.net.Mul([m, l2_dist],
                                           'masked_l2_dist_' + str(i))
            # masked_l2_dist = l2_dist
            regre_average_loss_lst.append(
                model.net.AveragedLoss(masked_l2_dist,
                                       'regre_average_loss_' + str(i)))
            i += 1

        assert i == self.regre_output_dim, 'output dim != # of loss split'

        # Training net
        model.AddGradientOperators([class_average_loss] +
                                   regre_average_loss_lst)
        build_adam(
            model,
            base_learning_rate=base_learning_rate * self.seq_size,
        )

        self.model = model
        self.predictions = [class_softmax_output, regre_output]
        self.loss = [class_average_loss] + regre_average_loss_lst
        for loss in self.loss:
            loss = str(loss)
            self.reports[loss] = []

        # Create a net to copy hidden_output to hidden_init
        prepare_state = core.Net("prepare_state")
        prepare_state.Copy(self.hidden_output, hidden_init)
        self.net_store['prepare'] = prepare_state
        self.net_store['train'] = core.Net(model.net.Proto())
Пример #9
0
 def build_optimizer(self, model, **kwargs):
     self._skip_gpu = True
     return build_adam(model,
                       base_learning_rate=0.1,
                       enableRAdam=True,
                       **kwargs)
    def add_training_operators(self, model, output, label, device_opts, loss,
                               opt_type, base_learning_rate, policy, stepsize,
                               epsilon, beta1, beta2, gamma, momentum):
        with core.DeviceScope(device_opts):
            if loss == 'cross_entropy':
                xent = model.LabelCrossEntropy([output, label], 'xent')
                loss = model.AveragedLoss(xent, "loss")
            elif loss == 'euclidean':
                dist = model.net.SquaredL2Distance([label, output], 'dist')
                loss = dist.AveragedLoss([], ['loss'])

            model.AddGradientOperators([loss])

            if opt_type == 'adam':
                if policy == 'step':
                    opt = optimizer.build_adam(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        stepsize=stepsize,
                        beta1=beta1,
                        beta2=beta2,
                        epsilon=epsilon)
                elif policy == 'fixed' or policy == 'inv':
                    opt = optimizer.build_adam(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        beta1=beta1,
                        beta2=beta2,
                        epsilon=epsilon)
                print("adam optimizer selected")
            elif opt_type == 'sgd':
                if policy == 'step':
                    opt = optimizer.build_sgd(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        stepsize=stepsize,
                        gamma=gamma,
                        momentum=momentum)
                elif policy == 'fixed' or policy == 'inv':
                    opt = optimizer.build_sgd(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        gamma=gamma,
                        momentum=momentum)
                print("sgd optimizer selected")
            elif opt_type == 'rmsprop':
                if policy == 'step':
                    opt = optimizer.build_rms_prop(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        stepsize=stepsize,
                        decay=gamma,
                        momentum=momentum,
                        epsilon=epsilon)
                elif policy == 'fixed' or policy == 'inv':
                    opt = optimizer.build_rms_prop(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        decay=gamma,
                        momentum=momentum,
                        epsilon=epsilon)
                print("rmsprop optimizer selected")
            elif opt_type == 'adagrad':
                if policy == 'step':
                    opt = optimizer.build_adagrad(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        stepsize=stepsize,
                        decay=gamma,
                        epsilon=epsilon)
                elif policy == 'fixed' or policy == 'inv':
                    opt = optimizer.build_adagrad(
                        model,
                        base_learning_rate=base_learning_rate,
                        policy=policy,
                        decay=gamma,
                        epsilon=epsilon)
                print("adagrad optimizer selected")
Пример #11
0
 def build_optimizer(self, model, **kwargs):
     self._skip_gpu = False
     return build_adam(model, base_learning_rate=0.1, **kwargs)
Пример #12
0
 def build_optimizer(self, model):
     build_adam(model, base_learning_rate=0.1)