def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, dilate=1, wscale=1, bias=0, nobias=False, use_cudnn=True, initialW=None, initial_bias=None): super(DilatedConvolution2D, self).__init__() self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.dilate = _pair(dilate) self.use_cudnn = use_cudnn self.out_channels = out_channels self.initialW = initialW self.wscale = wscale if in_channels is None: self.add_uninitialized_param('W') else: self._initialize_params(in_channels) if nobias: self.b = None else: self.add_param('b', out_channels) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, wscale=1, bias=0, nobias=False, use_cudnn=True, initialW=None, initial_bias=None): #super(BinaryConv2D_Link, self).__init__() super(Convolution2D, self).__init__() self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.use_cudnn = use_cudnn self.out_channels = out_channels self.initialW = initialW self.wscale = wscale if in_channels is None: self.add_uninitialized_param('W') else: self._initialize_params(in_channels) kh, kw = _pair(self.ksize) W_shape = (self.out_channels, in_channels, kh, kw) #self.add_param('W', W_shape) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, self.initialW, scale=math.sqrt(self.wscale)) if nobias: self.b = None else: self.add_param('b', out_channels) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, wscale=1, bias=0, nobias=False, outsize=None, use_cudnn=True, initialV=None, dtype=np.float32): kh, kw = _pair(ksize) self.stride = _pair(stride) self.pad = _pair(pad) self.outsize = (None, None) if outsize is None else outsize self.use_cudnn = use_cudnn self.dtype = dtype self.nobias = nobias self.out_channels = out_channels self.in_channels = in_channels V_shape = (in_channels, out_channels, kh, kw) super(Deconvolution2D, self).__init__(V=V_shape) if isinstance(initialV, (np.ndarray, cuda.ndarray)): assert initialV.shape == (in_channels, out_channels, kh, kw) initializers.init_weight(self.V.data, initialV, scale=math.sqrt(wscale)) if nobias: self.b = None else: self.add_uninitialized_param("b") self.add_uninitialized_param("g")
def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, wscale=1, bias=0, nobias=False, outsize=None, use_cudnn=True, initialW=None, initial_bias=None, deterministic=False): kh, kw = _pair(ksize) self.stride = _pair(stride) self.pad = _pair(pad) self.outsize = (None, None) if outsize is None else outsize self.use_cudnn = use_cudnn self.deterministic = deterministic W_shape = (in_channels, out_channels, kh, kw) super(Deconvolution2D, self).__init__(W=W_shape) if isinstance(initialW, (numpy.ndarray, cuda.ndarray)): assert initialW.shape == (in_channels, out_channels, kh, kw) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, initialW, scale=math.sqrt(wscale)) if nobias: self.b = None else: self.add_param('b', out_channels) if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)): assert initial_bias.shape == (out_channels,) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, wscale=1, bias=0, nobias=False, outsize=None, use_cudnn=True, initialW=None, initial_bias=None, deterministic=False): super(Deconvolution2D, self).__init__() self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.outsize = (None, None) if outsize is None else outsize self.use_cudnn = use_cudnn self.initialW = initialW self.wscale = wscale self.out_channels = out_channels self.deterministic = deterministic if in_channels is None: self.add_uninitialized_param('W') else: self._initialize_params(in_channels) if nobias: self.b = None else: self.add_param('b', out_channels) if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)): assert initial_bias.shape == (out_channels,) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def __init__(self, size, decay=0.9, eps=2e-5, dtype=numpy.float32, use_gamma=True, use_beta=True, initial_gamma=None, initial_beta=None): super(BatchNormalization, self).__init__() if use_gamma: self.add_param('gamma', size, dtype=dtype) if initial_gamma is None: initial_gamma = initializers.One() initializers.init_weight(self.gamma.data, initial_gamma) if use_beta: self.add_param('beta', size, dtype=dtype) if initial_beta is None: initial_beta = initializers.Zero() initializers.init_weight(self.beta.data, initial_beta) self.add_persistent('avg_mean', numpy.zeros(size, dtype=dtype)) self.add_persistent('avg_var', numpy.zeros(size, dtype=dtype)) self.add_persistent('N', 0) self.decay = decay self.eps = eps
def _initialize_params(self, in_size): self.add_param('W', (self.out_size, in_size)) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, self.initialW, scale=math.sqrt(self.wscale))
def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=False, initialW=None, initial_bias=None): super(BinaryLinear, self).__init__() self.initialW = initialW self.wscale = wscale self.out_size = out_size if in_size is None: self.add_uninitialized_param('W') else: self._initialize_params(in_size) if nobias: self.b = None else: self.add_param('b', out_size) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def _initialize_params(self, in_channels): kh, kw = _pair(self.ksize) W_shape = (in_channels, self.out_channels, kh, kw) self.add_param('W', W_shape) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, self.initialW, scale=math.sqrt(self.wscale))
def _initialize_params(self, in_channels): kh, kw = _pair(self.ksize) W_shape = (self.out_channels, in_channels, kh, kw) self.add_param('W', W_shape) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, self.initialW, scale=math.sqrt(self.wscale))
def __init__(self, in_size, out_size, lateral_init=None): super(GridLSTMBase, self).__init__( lateral=linear.Linear(in_size, 4 * out_size, initialW=0, nobias=True), ) self.state_size = out_size for i in six.moves.range(0, 4 * out_size, out_size): initializers.init_weight( self.lateral.W.data[i:i + out_size, :], lateral_init)
def __init__(self, nc, dtype=np.float32): super(InstanceNormalization, self).__init__() self.nc = nc self.dtype = dtype self.bn = None self.prev_batch = None self.add_param('gamma', nc, dtype=dtype) initializers.init_weight(self.gamma.data, np.random.uniform(size=nc)) self.add_param('beta', nc, dtype=dtype) initializers.init_weight(self.beta.data, initializers.Zero())
def __init__(self, children, in_size, out_size, lateral_init=None, upward_init=None, bias_init=0, forget_bias_init=0): super(SLSTM, self).__init__( upward=linear.Linear(in_size, 4 * out_size, initialW=0)) self.state_size = out_size self.n_children = children for i in range(0, 4 * out_size, out_size): initializers.init_weight(self.upward.W.data[i:i + out_size, :], upward_init) a, i, f, o = numpy_extract_gates( self.upward.b.data.reshape(1, 4 * out_size, 1)) initializers.init_weight(a, bias_init) initializers.init_weight(i, bias_init) initializers.init_weight(f, forget_bias_init) initializers.init_weight(o, bias_init) #hidden unit gates for each child for i in range(self.n_children): self.add_link( self.U_I_H.format(i), linear.Linear(out_size, out_size, initialW=lateral_init, nobias=True)) self.add_link( self.U_O_H.format(i), linear.Linear(out_size, out_size, initialW=lateral_init, nobias=True)) self.add_link( self.U_A_H.format(i), linear.Linear(out_size, out_size, initialW=lateral_init, nobias=True)) for j in range(self.n_children): self.add_link( self.U_F_H.format(i, j), linear.Linear(out_size, out_size, initialW=lateral_init, nobias=True))
def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=False, initialW=None, initial_bias=None): super(Linear, self).__init__(W=(out_size, in_size)) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, initialW, scale=math.sqrt(wscale)) if nobias: self.b = None else: self.add_param('b', out_size) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def _initialize_params(self): bias_initializer = initializers.Zero() self.add_param('b', self.state_size*4, initializer=bias_initializer) a, i, f, o = lstm._extract_gates(self.b.data.reshape(1, 4 * self.state_size, 1)) initializers.init_weight(a, self.bias_init) initializers.init_weight(i, self.bias_init) initializers.init_weight(f, self.forget_bias_init) initializers.init_weight(o, self.bias_init)
def __init__(self, ndim, in_channels, out_channels, ksize, stride=1, pad=0, initialW=None, initial_bias=None, use_cudnn=True, cover_all=False): ksize = conv_nd.as_tuple(ksize, ndim) self.stride = stride self.pad = pad self.use_cudnn = use_cudnn self.cover_all = cover_all W_shape = (out_channels, in_channels) + ksize super(ConvolutionND, self).__init__(W=W_shape) initializers.init_weight(self.W.data, initialW) if initial_bias is None: self.b = None else: self.add_param('b', out_channels) initializers.init_weight(self.b.data, initial_bias)
def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=True, initialW=None, initial_bias=None): super(MultiLinear, self).__init__() self.initialW = initialW self.wscale = wscale self.out_size = out_size self.in_size = in_size # self.batch_size = batch_size if in_size is None: self.add_uninitialized_param("W") else: self._initialize_params(in_size) if nobias: self.b = None else: self.add_param("b", out_size) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=False, initialW=None, initial_bias=None): super(Linear, self).__init__() self.initialW = initialW self.wscale = wscale self.out_size = out_size if in_size is None: self.add_uninitialized_param('W') else: self._initialize_params(in_size) if nobias: self.b = None else: self.add_param('b', out_size) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, wscale=1, bias=0, nobias=False, outsize=None, use_cudnn=True, initialW=None, initial_bias=None, deterministic=False): kh, kw = _pair(ksize) self.stride = _pair(stride) self.pad = _pair(pad) self.outsize = (None, None) if outsize is None else outsize self.use_cudnn = use_cudnn self.deterministic = deterministic W_shape = (in_channels, out_channels, kh, kw) super(Deconvolution2D, self).__init__(W=W_shape) if isinstance(initialW, (numpy.ndarray, cuda.ndarray)): assert initialW.shape == (in_channels, out_channels, kh, kw) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, initialW, scale=math.sqrt(wscale)) if nobias: self.b = None else: self.add_param('b', out_channels) if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)): assert initial_bias.shape == (out_channels, ) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, wscale=1, bias=0, nobias=False, use_cudnn=True, initialW=None, initial_bias=None): kh, kw = _pair(ksize) self.stride = _pair(stride) self.pad = _pair(pad) self.use_cudnn = use_cudnn W_shape = (out_channels, in_channels, kh, kw) super(Convolution2D, self).__init__(W=W_shape) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, initialW, scale=math.sqrt(wscale)) if nobias: self.b = None else: self.add_param('b', out_channels) if initial_bias is None: initial_bias = bias initializers.init_weight(self.b.data, initial_bias)
def __init__(self, left_size, right_size, out_size, nobias=True, initialW=None, initial_bias=None): super(Retrieval, self).__init__(W=(left_size, right_size, out_size)) self.in_sizes = (left_size, right_size) self.nobias = nobias # TODO(Kenta OONO): I do not know appropriate way of # initializing weights in tensor network. # This initialization is a modification of # that of Linear function. ''' if isinstance(initialW, (numpy.ndarray, cuda.ndarray)): assert initialW.shape == self.W.shape ''' initializers.init_weight(self.W.data, initialW) if not self.nobias: self.add_param('V1', (left_size, out_size)) self.add_param('V2', (right_size, out_size)) self.add_param('b', out_size) if isinstance(initial_bias, tuple): V1, V2, b = initial_bias elif initial_bias is None: V1 = V2 = None b = 0 else: raise ValueError('initial_bias must be tuple or None') if isinstance(V1, (numpy.ndarray, cuda.ndarray)): assert V1.shape == self.V1.shape if isinstance(V2, (numpy.ndarray, cuda.ndarray)): assert V2.shape == self.V2.shape if isinstance(b, (numpy.ndarray, cuda.ndarray)): assert b.shape == self.b.shape initializers.init_weight(self.V1.data, V1) initializers.init_weight(self.V2.data, V2) initializers.init_weight(self.b.data, b)
def __init__(self, left_size, right_size, out_size, nobias=False, initialW=None, initial_bias=None): super(Bilinear, self).__init__(W=(left_size, right_size, out_size)) self.in_sizes = (left_size, right_size) self.nobias = nobias # TODO(Kenta OONO): I do not know appropriate way of # initializing weights in tensor network. # This initialization is a modification of # that of Linear function. if isinstance(initialW, (numpy.ndarray, cuda.ndarray)): assert initialW.shape == self.W.data.shape initializers.init_weight(self.W.data, initialW) if not self.nobias: self.add_param('V1', (left_size, out_size)) self.add_param('V2', (right_size, out_size)) self.add_param('b', out_size) if isinstance(initial_bias, tuple): V1, V2, b = initial_bias elif initial_bias is None: V1 = V2 = None b = 0 else: raise ValueError('initial_bias must be tuple or None') if isinstance(V1, (numpy.ndarray, cuda.ndarray)): assert V1.shape == self.V1.data.shape if isinstance(V2, (numpy.ndarray, cuda.ndarray)): assert V2.shape == self.V2.data.shape if isinstance(b, (numpy.ndarray, cuda.ndarray)): assert b.shape == self.b.data.shape initializers.init_weight(self.V1.data, V1) initializers.init_weight(self.V2.data, V2) initializers.init_weight(self.b.data, b)
def __init__(self, in_size, out_size, initialW=None, ignore_label=None): super(EmbedID, self).__init__(W=(in_size, out_size)) if initialW is None: initialW = initializers.Normal(1.0) initializers.init_weight(self.W.data, initialW) self.ignore_label = ignore_label
def _initialize_params(self, size): self.add_param('gamma', size) initializers.init_weight(self.gamma.data, self._gamma_initializer) self.add_param('beta', size) initializers.init_weight(self.beta.data, self._beta_initializer)
def _initialize_params(self, in_size): self.add_param("W", (self.out_size, in_size)) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, self.initialW, scale=math.sqrt(self.wscale))
def initialize_LSTM(self, LSTM, initializer): initializers.init_weight(LSTM.upward.W.data, initializer) initializers.init_weight(LSTM.lateral.W.data, initializer)
def initialize_parameters(self): G_init = initializers.GlorotNormal() #initializers.init_weight(self.W_predict.W.data, G_init) initializers.init_weight(self.W_candidate.W.data, G_init) self.initialize_LSTM(self.LSTM, G_init)
def __init__(self, in_size, out_size, lateral_init=None, upward_init=None, bias_init=0, forget_bias_init=0): super(LSTMBase, self).__init__( upward=linear.Linear(in_size, 4 * out_size, initialW=0), lateral=linear.Linear(out_size, 4 * out_size, initialW=0, nobias=True), ) self.state_size = out_size for i in six.moves.range(0, 4 * out_size, out_size): initializers.init_weight(self.lateral.W.data[i:i + out_size, :], lateral_init) initializers.init_weight(self.upward.W.data[i:i + out_size, :], upward_init) a, i, f, o = lstm._extract_gates( self.upward.b.data.reshape(1, 4 * out_size, 1)) initializers.init_weight(a, bias_init) initializers.init_weight(i, bias_init) initializers.init_weight(f, forget_bias_init) initializers.init_weight(o, bias_init)
def __init__(self, children, in_size, out_size, lateral_init=None, upward_init=None, bias_init=0, forget_bias_init=0): super(FasterTreeLSTM, self).__init__( upward=L.Linear(in_size, 4 * out_size, initialW=0), lateral=L.Linear(children * out_size, 3 * children * out_size, initialW=0, nobias=True), forget=L.Linear(children * out_size, children * children * out_size, initialW=0, nobias=True) ) self.state_size = out_size self.n_children = children for i in range(0, 4 * out_size, out_size): initializers.init_weight(self.upward.W.data[i:i + out_size, :], upward_init) for j in range(0, 4 * out_size, out_size): initializers.init_weight(self.lateral.W.data[i + j:i + j + out_size, :], lateral_init) for j in range(0, self.n_children * out_size, out_size): initializers.init_weight(self.forget.W.data[i + j:i + j + out_size, :], lateral_init) a, i, f, o = numpy_extract_gates(self.upward.b.data.reshape(1, 4 * out_size, 1)) initializers.init_weight(a, bias_init) initializers.init_weight(i, bias_init) initializers.init_weight(f, forget_bias_init) initializers.init_weight(o, bias_init)
def __init__(self, in_size, out_size, lateral_init=None, upward_init=None, bias_init=0, forget_bias_init=0): super(LSTMBase, self).__init__( upward=linear.Linear(in_size, 4 * out_size, initialW=0), lateral=linear.Linear(out_size, 4 * out_size, initialW=0, nobias=True), ) self.state_size = out_size for i in six.moves.range(0, 4 * out_size, out_size): initializers.init_weight( self.lateral.W.data[i:i + out_size, :], lateral_init) initializers.init_weight( self.upward.W.data[i:i + out_size, :], upward_init) a, i, f, o = lstm._extract_gates( self.upward.b.data.reshape(1, 4 * out_size, 1)) initializers.init_weight(a, bias_init) initializers.init_weight(i, bias_init) initializers.init_weight(f, forget_bias_init) initializers.init_weight(o, bias_init)
def _initialize_params(self): self.add_param('W', self.initialW.shape) # For backward compatibility, the scale of weights is proportional to # the square root of wscale. initializers.init_weight(self.W.data, self.initialW, scale=math.sqrt(self.wscale))
def _initialize_params(self, in_channels): kh, kw = _pair(self.ksize) W_shape = (self.out_channels, in_channels, kh, kw) self.add_param('W', W_shape) initializers.init_weight(self.W.data, self.initialW)
def _initialize_params(self): for i in six.moves.range(0, 4 * self.state_size, self.state_size): initializers.init_weight( self.lateral.W.data[i:i + self.state_size, :], self.lateral_init) initializers.init_weight( self.upward.W.data[i:i + self.state_size, :], self.upward_init) a, i, f, o = lstm._extract_gates( self.upward.b.data.reshape(1, 4 * self.state_size, 1)) initializers.init_weight(a, self.bias_init) initializers.init_weight(i, self.bias_init) initializers.init_weight(f, self.forget_bias_init) initializers.init_weight(o, self.bias_init)