def __init__(self, num_groups, num_channels, eps=1e-05, affine=None, is_train=True): assert affine == None self.num_groups = num_groups self.num_channels = num_channels self.eps = eps self.weight = init.constant((num_channels,), "float32", 1.0) self.bias = init.constant((num_channels,), "float32", 0.0)
def test_constant(self): a = init.constant(2, "float32") np.testing.assert_allclose(a.data, [0, 0]) a = init.constant((2, 3), value=1.) np.testing.assert_allclose(a.data, [[1, 1, 1], [1, 1, 1]]) linear = nn.Linear(2, 2) init.constant_(linear.weight) np.testing.assert_allclose(linear.weight.data, [[0, 0], [0, 0]])
def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=None, is_train=True, sync=True): assert affine == None self.sync = sync self.num_features = num_features self.is_train = is_train self.eps = eps self.momentum = momentum self.weight = init.constant((num_features,), "float32", 1.0) self.bias = init.constant((num_features,), "float32", 0.0)
def __init__(self, normalized_shape, eps: float = 1e-5, elementwise_affine: bool = True) -> None: super(LayerNorm, self).__init__() if isinstance(normalized_shape, int): normalized_shape = (normalized_shape,) self.normalized_shape = tuple(normalized_shape) self.eps = eps self.elementwise_affine = elementwise_affine if self.elementwise_affine: self.weight = init.constant(normalized_shape, "float32", 1.0) self.bias = init.constant(normalized_shape, "float32", 0.0)
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=None, is_train=True): assert affine == None self.num_features = num_features self.is_train = is_train self.eps = eps self.momentum = momentum self.weight = init.constant((num_features,), "float32", 1.0) self.bias = init.constant((num_features,), "float32", 0.0) self.running_mean = init.constant((num_features,), "float32", 0.0).stop_grad() self.running_var = init.constant((num_features,), "float32", 1.0).stop_grad()
def add_self_loops(edge_index, edge_weight: Optional[Var] = None, fill_value: float = 1., num_nodes: Optional[int] = None): r"""Adds a self-loop :math:`(i,i) \in \mathcal{E}` to every node :math:`i \in \mathcal{V}` in the graph given by :attr:`edge_index`. In case the graph is weighted, self-loops will be added with edge weights denoted by :obj:`fill_value`. Args: edge_index (Var int32): The edge indices. edge_weight (Var, optional): One-dimensional edge weights. (default: :obj:`None`) fill_value (float, optional): If :obj:`edge_weight` is not :obj:`None`, will add self-loops with edge weights of :obj:`fill_value` to the graph. (default: :obj:`1.`) num_nodes (int, optional): The number of nodes, *i.e.* :obj:`max_val + 1` of :attr:`edge_index`. (default: :obj:`None`) :rtype: (:class:`Var int32`, :class:`Var`) """ N = maybe_num_nodes(edge_index, num_nodes) loop_index = jt.arange(0, N, dtype=Var.int32) loop_index = loop_index.unsqueeze(0).repeat(2, 1) if edge_weight is not None: assert edge_weight.numel() == edge_index.size(1) loop_weight = init.constant((N, ), edge_weight.dtype, fill_value) edge_weight = jt.concat([edge_weight, loop_weight], dim=0) edge_index = jt.concat([edge_index, loop_index], dim=1) return edge_index, edge_weight
def __init__(self, input_size, output_size, gain=2**0.5, use_wscale=False, lrmul=1, bias=True): super().__init__() he_std = gain * input_size**(-0.5) # He init # Equalized learning rate and custom learning rate multiplier. if use_wscale: init_std = 1.0 / lrmul self.w_mul = he_std * lrmul else: init_std = he_std / lrmul self.w_mul = lrmul # self.weight = torch.nn.Parameter(torch.randn(output_size, input_size) * init_std) # self.weight = jt.random([output_size, input_size], 'float', 'normal') * init_std self.weight = init.gauss([output_size, input_size], 'float32') * init_std if bias: # self.bias = torch.nn.Parameter(torch.zeros(output_size)) # self.bias = jt.zeros(output_size) self.bias = init.constant([output_size], 'float32', 0.0) self.b_mul = lrmul else: self.bias = None
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, is_train=True, sync=True): super(RepresentativeBatchNorm2d, self).__init__(num_features, eps, momentum, affine, is_train, sync) self.sync = sync self.num_features = num_features self.is_train = is_train self.eps = eps self.momentum = momentum self.affine = affine self.weight = init.constant( (1, num_features, 1, 1), "float32", 1.0) if affine else 1.0 self.bias = init.constant( (1, num_features, 1, 1), "float32", 0.0) if affine else 0.0 self.running_mean = init.constant((num_features, ), "float32", 0.0).stop_grad() self.running_var = init.constant((num_features, ), "float32", 1.0).stop_grad() ### weights for centering calibration ### $ self.center_weight = init.constant((1, num_features, 1, 1), "float32", 0.0) ### weights for scaling calibration ### $ self.scale_weight = init.constant((1, num_features, 1, 1), "float32", 1.0) self.scale_bias = init.constant((1, num_features, 1, 1), "float32", 0.0) ### calculate statistics ###$ self.stas = nn.AdaptiveAvgPool2d((1, 1))
def batch_norm(x, is_train, eps=1e-5, momentum=0.1): w = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 1.0)) b = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 0.0)) running_mean = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 0.0)) running_var = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 1.0)) w = w.broadcast(x, [0,2,3]) b = b.broadcast(x, [0,2,3]) if is_train: xmean = jt.mean(x, dims=[0,2,3], keepdims=1) x2mean = jt.mean(x*x, dims=[0,2,3], keepdims=1) xvar = x2mean-xmean*xmean norm_x = (x-xmean)/jt.sqrt(xvar+eps) running_mean += (xmean.sum([0,2,3])-running_mean)*momentum running_var += (xvar.sum([0,2,3])-running_var)*momentum else: running_mean = running_mean.broadcast(x, [0,2,3]) running_var = running_var.broadcast(x, [0,2,3]) norm_x = (x-running_mean)/jt.sqrt(running_var+eps) return norm_x * w + b
def __init__(self, input_channels, output_channels, kernel_size, stride=1, gain=2**0.5, use_wscale=False, lrmul=1, bias=True, intermediate=None, upscale=False, downscale=False): super().__init__() if upscale: self.upscale = Upscale2d() else: self.upscale = None if downscale: self.downscale = Downscale2d() else: self.downscale = None he_std = gain * (input_channels * kernel_size**2)**(-0.5) # He init self.kernel_size = kernel_size if use_wscale: init_std = 1.0 / lrmul self.w_mul = he_std * lrmul else: init_std = he_std / lrmul self.w_mul = lrmul # self.weight = torch.nn.Parameter( # torch.randn(output_channels, input_channels, kernel_size, kernel_size) * init_std) # self.weight = jt.random([output_channels, input_channels, kernel_size, kernel_size], 'float', 'normal') * init_std self.weight = init.gauss( [output_channels, input_channels, kernel_size, kernel_size], 'float32') * init_std if bias: # self.bias = torch.nn.Parameter(torch.zeros(output_channels)) # self.bias = jt.zeros(output_channels) self.bias = init.constant([output_channels], 'float32', 0.0) self.b_mul = lrmul else: self.bias = None self.intermediate = intermediate
def add_remaining_self_loops(edge_index, edge_weight: Optional[Var] = None, fill_value: float = 1., num_nodes: Optional[int] = None): r"""Adds remaining self-loop :math:`(i,i) \in \mathcal{E}` to every node :math:`i \in \mathcal{V}` in the graph given by :attr:`edge_index`. In case the graph is weighted and already contains a few self-loops, only non-existent self-loops will be added with edge weights denoted by :obj:`fill_value`. Args: edge_index (Var int32): The edge indices. edge_weight (Var, optional): One-dimensional edge weights. (default: :obj:`None`) fill_value (float, optional): If :obj:`edge_weight` is not :obj:`None`, will add self-loops with edge weights of :obj:`fill_value` to the graph. (default: :obj:`1.`) num_nodes (int, optional): The number of nodes, *i.e.* :obj:`max_val + 1` of :attr:`edge_index`. (default: :obj:`None`) :rtype: (:class:`Var int32`, :class:`Var`) """ N = maybe_num_nodes(edge_index, num_nodes) row, col = edge_index[0], edge_index[1] mask = row != col loop_index = jt.arange(0, N, dtype=row.dtype) loop_index = loop_index.unsqueeze(0).repeat(2, 1) edge_index = jt.concat([edge_index[:, mask], loop_index], dim=1) if edge_weight is not None: inv_mask = jt.logical_not(mask) loop_weight = init.constant((N, ), edge_weight.dtype, fill_value) remaining_edge_weight = edge_weight[inv_mask] if remaining_edge_weight.numel() > 0: loop_weight[row[inv_mask]] = remaining_edge_weight edge_weight = jt.concat([edge_weight[mask], loop_weight], dim=0) return edge_index, edge_weight
def R1Penalty(self, real_img, height, alpha): # TODO: use_loss_scaling, for fp16 # apply_loss_scaling = lambda x: x * torch.exp(x * torch.Tensor([np.float32(np.log(2.0))]).to(real_img.device)) apply_loss_scaling = lambda x: x * jt.exp(x * jt.array( [np.float32(np.log(2.0))])) # undo_loss_scaling = lambda x: x * torch.exp(-x * torch.Tensor([np.float32(np.log(2.0))]).to(real_img.device)) undo_loss_scaling = lambda x: x * jt.exp(-x * jt.array( [np.float32(np.log(2.0))])) # real_img = torch.autograd.Variable(real_img, requires_grad=True) real_img = init.constant(real_img.shape, 'float32', real_img) assert not real_img.is_stop_grad() real_logit = self.dis(real_img, height, alpha) # real_logit = apply_loss_scaling(torch.sum(real_logit)) # real_grads = torch.autograd.grad(outputs=real_logit, inputs=real_img, # grad_outputs=torch.ones(real_logit.size()).to(real_img.device), # create_graph=True, retain_graph=True)[0].view(real_img.size(0), -1) real_grads = jt.grad(real_logit, real_img).view(real_img.size(0), -1) # real_grads = undo_loss_scaling(real_grads) # r1_penalty = torch.sum(torch.mul(real_grads, real_grads)) r1_penalty = jt.sum(jt.multiply(real_grads, real_grads)) return r1_penalty
def __init__(self, num_parameters=1, init_=0.25): self.num_parameters = num_parameters self.a = init.constant((num_parameters, ), "float32", init_)
def _zero_init_conv(self): self.scale_conv.weight = init.constant( [self.in_channels, self.in_channels, 1, 1], 'float', value=0.0)
def __init__(self, channels): super().__init__() # self.weight = nn.Parameter(torch.zeros(channels)) # self.weight = jt.zeros(channels) self.weight = init.constant([channels], 'float32', 0.0) self.noise = None