def conv(inp, outmaps, kernel, pad=None, stride=None, dilation=None, group=1, w_init=None, b_init=None, base_axis=1, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], outmaps, kernel=kernel) else: std = calc_normal_std_he_forward(inp.shape[base_axis], outmaps, kernel=kernel) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng) w = get_parameter_or_create( "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel), w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", (outmaps, ), b_init, not fix_parameters) return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
def affine(inp, n_outmaps, base_axis=1, w_init=None, b_init=None, fix_parameters=False, rng=None, with_bias=True, use_wscale=True, use_he_backward=False): """ """ if not hasattr(n_outmaps, '__iter__'): n_outmaps = [n_outmaps] n_outmaps = list(n_outmaps) n_outmap = int(np.prod(n_outmaps)) # Use He backward if use_he_backward: std = calc_normal_std_he_backward(inp.shape[base_axis], n_outmap) else: std = calc_normal_std_he_forward(inp.shape[base_axis], n_outmap) # W init if w_init is None and use_wscale: # Equalized Learning Rate w_init = NormalInitializer(1.) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) w *= std elif w_init is None and not use_wscale: w_init = NormalInitializer(std) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) else: if w_init is None: w_init = UniformInitializer(calc_uniform_lim_glorot( inp.shape[base_axis], n_outmaps), rng=rng) w = get_parameter_or_create( "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init, not fix_parameters) if with_bias and b_init is None: b_init = ConstantInitializer() b = None if with_bias: b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters) return F.affine(inp, w, b, base_axis)
def pf_affine(r, num_classes=1000, channel_last=False): r = PF.convolution(r, num_classes, (1, 1), channel_last=channel_last, w_init=NormalInitializer(sigma=0.01, rng=RNG), name='fc') return F.reshape(r, (r.shape[0], -1), inplace=False)
def call(self, inputs): r""" Args: inputs (nn.Variable): An input variable of shape (B, T). Returns: nn.Variable: Output variable of shape (T, B, C). """ # inputs of shape (B, T) hparams = self._hparams embedded_inputs = PF.embed(inputs, n_inputs=len(hparams.vocab), n_features=hparams.symbols_embedding_dim, initializer=NormalInitializer(0.3), name='embedding') # (B, T, C) prenet_outputs = prenet(embedded_inputs, layer_sizes=hparams.prenet_channels, is_training=self.training, scope='prenet_encoder') # (B, T, C) encoder_outputs = encoder_cbhg(F.transpose(prenet_outputs, (0, 2, 1)), depth=hparams.encoder_embedding_dim, is_training=self.training) # (T, B, C) return encoder_outputs
def spectral_normalization_for_conv(w, itr=1, eps=1e-12, test=False): w_shape = w.shape d0 = w.shape[0] # Out d1 = np.prod(w.shape[1:]) # In u0 = get_parameter_or_create( "singular-vector", [d0], NormalInitializer(), False) return F.spectral_norm(w, u0, dim=0, itr=itr, eps=eps, test=test)
def conv_block(x, out_ch, kernel, stride, pad, test, scope): with nn.parameter_scope(scope): h = PF.convolution(x, out_ch, (kernel, kernel), stride=(stride, stride), pad=(pad, pad), w_init=NormalInitializer(0.02), name='conv') h = PF.batch_normalization(h, batch_stat=not test, name='bn') h = F.leaky_relu(h, alpha=0.2) return h
def discriminator(x, num_layer, fs, min_fs, kernel, pad, scope, test=False): with nn.parameter_scope(scope): h = conv_block(x, fs, kernel, 1, pad, test, 'head') h = middle_blocks(h, num_layer, fs, min_fs, kernel, pad, test) h = PF.convolution(h, 1, (kernel, kernel), stride=(1, 1), pad=(pad, pad), w_init=NormalInitializer(0.02), name='tail') return h
def _get_generator(proto): if proto.type == 'Normal': return NormalInitializer(sigma=proto.multiplier) elif proto.type == 'Uniform': return UniformInitializer(lim=(-proto.multiplier, proto.multiplier)) elif proto.type == 'Constant': return ConstantInitializer(value=proto.multiplier) else: raise ValueError('Generator type "' + proto.type + '" is not supported.')
def generator(x, y, num_layer, fs, min_fs, kernel, pad, scope, test=False): with nn.parameter_scope(scope): h = conv_block(x, fs, kernel, 1, pad, test, 'head') h = middle_blocks(h, num_layer, fs, min_fs, kernel, pad, test) h = PF.convolution(h, x.shape[1], (kernel, kernel), stride=(1, 1), pad=(pad, pad), w_init=NormalInitializer(0.02), name='tail') h = F.tanh(h) ind = int((y.shape[2] - h.shape[2]) / 2) y = y[:, :, ind:(y.shape[2] - ind), ind:(y.shape[3] - ind)] return y + h
def _create_variable(v, name, shape): # Create and initialize variables class Variable: pass parameter = v.type == "Parameter" variable_instance = None if parameter: if v.initializer.type == 'Normal': initializer = NormalInitializer(v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHeBackward': initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionGlorot': initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Uniform': initializer = UniformInitializer( lim=[-v.initializer.multiplier, v.initializer.multiplier]) elif v.initializer.type == 'UniformAffineGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier) elif v.initializer.type == 'UniformConvolutionGlorot': initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier) elif v.initializer.type == 'Constant': initializer = ConstantInitializer(value=v.initializer.multiplier) else: initializer = None variable_instance = get_parameter_or_create(name, shape, initializer) else: # create empty variable, memory will be allocated in network.setup() # after network optimization variable_instance = nn.Variable() variable = Variable() variable.name = name variable.parameter = parameter variable.shape = shape variable.variable_instance = variable_instance return variable
def spectral_normalization_for_affine(w, itr=1, eps=1e-12, input_axis=1, test=False): d0 = np.prod(w.shape[0:-1]) # In d1 = np.prod(w.shape[-1]) # Out u0 = get_parameter_or_create("singular-vector", [d1], NormalInitializer(), False) return F.spectral_norm(w, u0, dim=len(w.shape) - 1, itr=itr, eps=eps, test=test)
def __init__(self, num_layers, heads, head_conv, training=True, channel_last=False, **kwargs): self.num_layers = num_layers self.training = training self.heads = heads self.head_conv = head_conv self.backbone_model = resnet_imagenet self.n_init = NormalInitializer(0.001) self.channel_last = channel_last # used for deconv num_layers self.ochannels = ([256, 256, 256]) self.kernels_size = ([4, 4, 4])
def __init__(self, num_layers, heads, head_conv, training=True, channel_last=False, **kwargs): self.n_init = NormalInitializer(0.001) self.backbone_model = DLAUp self.num_layerse = num_layers self.training = training self.heads = heads self.head_conv = head_conv self.channel_last = channel_last # TODO add DLA variations self.axes = 3 if self.channel_last else 1
def spectral_normalization_for_affine(w, itr=1, eps=1e-12, input_axis=1, test=False): W_sn = get_parameter_or_create("W_sn", w.shape, ConstantInitializer(0), False) if test: return W_sn d0 = np.prod(w.shape[0:-1]) # In d1 = np.prod(w.shape[-1]) # Out u0 = get_parameter_or_create("singular-vector", [d1], NormalInitializer(), False) u = F.reshape(u0, [d1, 1]) # Power method for _ in range(itr): # v v = F.affine(w, u) v = F.div2( v, F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5)) v = F.reshape(v, [1, d0]) # u u = F.affine(v, w) u = F.div2( u, F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5)) u = F.reshape(u, [d1, 1]) # Iterate u = F.identity(u, outputs=[u0.data]) u.persistent = True # No grad u.need_grad = False v.need_grad = False # Spectral normalization wv = F.affine(v, w) sigma = F.affine(wv, u) sigma = F.broadcast(F.reshape(sigma, [1 for _ in range(len(w.shape))]), w.shape) w_sn = F.div2(w, sigma, outputs=[W_sn.data]) w_sn.persistent = True return w_sn
def spectral_normalization_for_conv(w, itr=1, eps=1e-12, test=False): w_shape = w.shape W_sn = get_parameter_or_create("W_sn", w_shape, ConstantInitializer(0), False) if test: return W_sn d0 = w.shape[0] # Out d1 = np.prod(w.shape[1:]) # In w = F.reshape(w, [d0, d1], inplace=False) u0 = get_parameter_or_create("singular-vector", [d0], NormalInitializer(), False) u = F.reshape(u0, [1, d0]) # Power method for _ in range(itr): # v v = F.affine(u, w) v = F.div2( v, F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5)) v = F.reshape(v, [d1, 1]) # u u = F.affine(w, v) u = F.div2( u, F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5)) u = F.reshape(u, [1, d0]) # Iterate u = F.identity(u, outputs=[u0.data]) u.persistent = True # No grad u.need_grad = False v.need_grad = False # Spectral normalization wv = F.affine(w, v) sigma = F.affine(u, wv) w_sn = F.div2(w, sigma) w_sn = F.reshape(w_sn, w_shape) w_sn = F.identity(w_sn, outputs=[W_sn.data]) w_sn.persistent = True return w_sn
def he_initializer(ochan, kernel, rng): return NormalInitializer(sigma=np.sqrt(2 / (kernel * kernel * ochan)), rng=rng)
def call(self, audio, spec): r"""Return a variable. Args: audio (nn.Variable): A variable represents audio of shape (B, n_groups, L). spec (nn.Variable): A variable represents spectrogram of shape (B, n_mels, L). Returns: nn.Variable: An output variable. """ hp = self.hparams in_channels = audio.shape[1] n_channels = hp.wn_n_channels n_layers = hp.wn_n_layers kernel_size = hp.wn_kernel_size with nn.parameter_scope('start'): audio = PF.convolution(audio, n_channels, kernel=(1, ), apply_w=PF.weight_normalization, w_init=NormalInitializer(0.05)) for i in range(n_layers): with nn.parameter_scope(f'layer_{i}'): dilation = 2**i padding = ((kernel_size - 1) * dilation) // 2 with nn.parameter_scope('fused'): with nn.parameter_scope('in_layer'): audio_inp = PF.convolution( audio, 2 * n_channels, kernel=(kernel_size, ), dilation=(dilation, ), pad=(padding, ), apply_w=PF.weight_normalization, w_init=NormalInitializer(0.05)) with nn.parameter_scope('cond_layer'): spec_inp = PF.convolution( spec, 2 * n_channels, kernel=(1, ), apply_w=PF.weight_normalization, w_init=NormalInitializer(0.05)) acts = fused_add_tanh_sigmoid_multiply( audio_inp, spec_inp, n_channels) with nn.parameter_scope('res_skip'): channels = 2 * n_channels if i < n_layers - 1 else n_channels res_skip_acts = PF.convolution( acts, channels, kernel=(1, ), apply_w=PF.weight_normalization, w_init=NormalInitializer(0.05)) if i < n_layers - 1: audio += res_skip_acts[:, :n_channels, :] skip_acts = res_skip_acts[:, n_channels:, :] else: skip_acts = res_skip_acts if i == 0: output = skip_acts else: output += skip_acts with nn.parameter_scope('end'): # Initializing last layer to 0 output = PF.convolution(output, 2 * in_channels, kernel=(1, ), w_init=ConstantInitializer(0.0), b_init=ConstantInitializer(0.0)) return output
def wn_deconv(*args, **kwargs): return PF.deconvolution(*args, **kwargs, apply_w=PF.weight_normalization, w_init=NormalInitializer(0.02))
def sn_conv(*args, **kwargs): return PF.convolution(*args, **kwargs, apply_w=lambda w: PF.spectral_norm(w, dim=0), w_init=NormalInitializer(0.01))