Пример #1
0
    def step(self, gradients: List[Optional[Tensor]]):
        params = self.param_group['params']
        params_with_grad = []
        grads = []
        state_sums = []
        state_steps: List[Tensor] = []

        if len(params) != len(gradients):
            raise ValueError(
                "the gradients passed in does not equal to the size of the parameters!"
                + f"Params length: {len(params)}. " +
                f"Gradients length: {len(gradients)}")

        has_sparse_grad = False
        for param, gradient in zip(self.param_group['params'], gradients):
            if gradient is not None:
                if gradient.is_sparse:
                    has_sparse_grad = True
                params_with_grad.append(param)
                grads.append(gradient)
                state = self.state[param]
                state_sums.append(state['sum'])
                state_steps.append(state['step'])

        with torch.no_grad():
            F.adagrad(params,
                      grads,
                      state_sums,
                      state_steps,
                      lr=self.defaults['lr'],
                      weight_decay=self.defaults['weight_decay'],
                      lr_decay=self.defaults['lr_decay'],
                      eps=self.defaults['eps'],
                      has_sparse_grad=has_sparse_grad,
                      foreach=self.foreach)
Пример #2
0
    def step(self, gradients: List[Optional[Tensor]]):
        params = self.param_group['params']
        params_with_grad = []
        grads = []
        state_sums = []
        state_steps: List[int] = []

        if len(params) != len(gradients):
            raise ValueError(
                "the gradients passed in does not equal to the size of the parameters!"
                + f"Params length: {len(params)}. " +
                f"Gradients length: {len(gradients)}")

        for param, gradient in zip(self.param_group['params'], gradients):
            if gradient is not None:
                params_with_grad.append(param)
                grads.append(gradient)
                state = self.state[param]
                state_sums.append(state['sum'])
                # update the steps for each param group update
                state['step'] += 1
                # record the step after step update
                state_steps.append(state['step'].item())

        with torch.no_grad():
            F.adagrad(params,
                      grads,
                      state_sums,
                      state_steps,
                      lr=self.defaults['lr'],
                      weight_decay=self.defaults['weight_decay'],
                      lr_decay=self.defaults['lr_decay'],
                      eps=self.defaults['eps'])