def hardsquaremax(input: Tensor) -> Tensor: x = input.clone() m = torch.sqrt(torch.sum((torch.pow(input, 2)), dim=1, keepdim=True)) for idx, sum_i in enumerate(m): if sum_i == 0: m[idx] = 1 x[idx][x[idx] == 0] = 1 s = x.add(m) / (2 * m) output = s / torch.sum(s, dim=1, keepdim=True) return output
def forward(self, input:Tensor)->Tuple[Tensor, Optional[List[Tensor]]]: # TEST: Would not multipling by omega_0 during initialization help performance or multiplying by omega_0 for more layers? input:Tensor = input.clone().detach().requires_grad_(True) if self.intermediate_output: intermediate_output:List[Tensor] = [] for weight, bias in self.layer_parameters[:-1]: input = F.linear(input, weight, bias).sin() intermediate_output.append(input.clone()) input = F.linear(input, self.layer_parameters[-1][0], self.layer_parameters[-1][1]) if self.linear_output: return input, intermediate_output return input.sin(), intermediate_output for weight, bias in self.layer_parameters[:-1]: input = F.linear(input, weight, bias).sin() if self.linear_output: return F.linear(input, self.layer_parameters[-1][0], self.layer_parameters[-1][1]), None return F.linear(input, self.layer_parameters[-1][0], self.layer_parameters[-1][1]).sin(), None
def hardmax(input: Tensor) -> Tensor: r"""hardmax(input) -> Tensor Applies hardmax function element-wise. Elements will be probability distributions that sum to 1 and in such a way, that makes it possible to avoid unnecessary non-linearity effect typical to the Softmax procedure. Args: input (Tensor): the input tensor. Returns: A tensor of the same shape as input. """ x = input.clone() m = torch.sum(torch.abs(input), dim=1, keepdim=True) for idx, sum_i in enumerate(m): if sum_i == 0: m[idx] = 1 x[idx][x[idx] == 0] = 1 s = x.add(m) / (2 * m) output = s / torch.sum(s, dim=1, keepdim=True) return output