def create_initializer(init_type, scale=None, fillvalue=None): if init_type == 'identity': return initializers.Identity() if scale is None else initializers.Identity(scale=scale) if init_type == 'constant': return initializers.Constant(fillvalue) if init_type == 'zero': return initializers.Zero() if init_type == 'one': return initializers.One() if init_type == 'normal': return initializers.Normal() if scale is None else initializers.Normal(scale) if init_type == 'glorotNormal': return initializers.GlorotNormal() if scale is None else initializers.GlorotNormal(scale) if init_type == 'heNormal': return initializers.HeNormal() if scale is None else initializers.HeNormal(scale) if init_type == 'orthogonal': return initializers.Orthogonal( scale) if scale is None else initializers.Orthogonal(scale) if init_type == 'uniform': return initializers.Uniform( scale) if scale is None else initializers.Uniform(scale) if init_type == 'leCunUniform': return initializers.LeCunUniform( scale) if scale is None else initializers.LeCunUniform(scale) if init_type == 'glorotUniform': return initializers.GlorotUniform( scale) if scale is None else initializers.GlorotUniform(scale) if init_type == 'heUniform': return initializers.HeUniform( scale) if scale is None else initializers.HeUniform(scale) raise ValueError("Unknown initializer type: {0}".format(init_type))
def check_orthogonality(self, w): initializer = initializers.Orthogonal(scale=2.0) initializer(w) n = 1 if w.ndim == 0 else len(w) w = w.astype(numpy.float64).reshape(n, -1) dots = w.dot(w.T) testing.assert_allclose(dots, numpy.identity(n) * 4, **self.check_options)
def __init__(self, in_capsules, out_capsules, ksize, stride=1, pad=0, nobias=False, initialW=None, initial_bias=None): super().__init__() pi, n_pi = in_capsules rho, n_rho = out_capsules pi_dim = pi.dim rho_dim = rho.dim pi = group.induced_representation(pi, ksize) basis = group.intertwiner_basis(pi, rho) basis = basis.astype(chainer.config.dtype) hom_dim = basis.shape[1] basis_randomizer = np.empty((hom_dim, hom_dim), dtype=basis.dtype) basis_scale = math.sqrt(basis.shape[0] / hom_dim) initializers.Orthogonal(scale=basis_scale)(basis_randomizer) basis = np.matmul(basis, basis_randomizer) basis = basis.reshape((rho_dim, pi_dim * ksize * ksize, hom_dim)) self.add_persistent('basis', basis) if n_pi is None: in_channels = None else: in_channels = n_pi * pi_dim out_channels = n_rho * rho_dim self.ksize = ksize self.stride = stride self.pad = pad self.in_channels = in_channels self.out_channels = out_channels with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.W = variable.Parameter(W_initializer) if in_channels is not None: self._initialize_params(in_channels) if nobias: self.b = None else: if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = variable.Parameter(bias_initializer, out_channels)
def setUp(self): self.w = numpy.empty(self.shape, dtype=numpy.float32) self.initializer = initializers.Orthogonal(scale=1.0)
def setUp(self): self.w = numpy.empty(0, dtype=numpy.float32) self.initializer = initializers.Orthogonal()
def check_shaped_initializer(self, xp): initializer = initializers.Orthogonal(scale=2.0, dtype=self.dtype) w = initializers.generate_array(initializer, self.shape, xp) self.assertIs(cuda.get_array_module(w), xp) self.assertTupleEqual(w.shape, self.shape) self.assertEqual(w.dtype, self.dtype)
def check_initializer(self, w): initializer = initializers.Orthogonal(scale=2.0) initializer(w) self.assertTupleEqual(w.shape, self.shape) self.assertEqual(w.dtype, self.dtype)
def __init__(self, vocab, essay_info_dict, para_info_dict, max_n_spans_para, max_n_paras, max_n_tokens, settings, baseline_heuristic=False, use_elmo=True, decoder="proposed"): ########################## # set default attributes # ########################## self.vocab = vocab self.essay_info_dict = essay_info_dict self.para_info_dict = para_info_dict self.encVocabSize = len(vocab) self.eDim = settings.eDim self.hDim = settings.hDim self.dropout = settings.dropout self.dropout_lstm = settings.dropout_lstm self.dropout_embedding = settings.dropout_embedding self.max_n_para = max_n_paras self.max_n_spans = max_n_spans_para self.max_n_tokens = max_n_tokens self.decoder = decoder self.args = settings ############### # Select LSTM # ############### self.lstm_ac = settings.lstm_ac self.lstm_shell = settings.lstm_shell self.lstm_ac_shell = settings.lstm_ac_shell self.lstm_type = settings.lstm_type ####################### # position information # ####################### self.position_info_size = self.max_n_spans * 3 self.relative_position_info_size = 21 ################ # elmo setting # ################ self.use_elmo = use_elmo if self.use_elmo: self.eDim = 1024 ########## # others # ########## self.baseline_heuristic = baseline_heuristic ############################## # hidden representation size # ############################## self.lstm_out = self.hDim * 2 if self.use_elmo: self.bow_feature_size = len(self.vocab) else: self.bow_feature_size = len(self.vocab) + 3 * self.eDim self.bow_rep_size = self.lstm_out # the size of representation created with LSTM-minus self.span_rep_size = self.lstm_out * 2 # output of AC layer if self.lstm_ac: self.ac_rep_size = self.lstm_out else: self.ac_rep_size = self.span_rep_size # output of AM layer if self.lstm_shell: self.shell_rep_size = self.lstm_out else: self.shell_rep_size = self.span_rep_size # the size of ADU representation self.ac_shell_rep_size_in = self.ac_rep_size +\ self.shell_rep_size + self.position_info_size + self.bow_rep_size # output of ADU layer if self.lstm_ac_shell: self.ac_shell_rep_size_out = self.lstm_out else: self.ac_shell_rep_size_out = self.ac_shell_rep_size_in # output of Encoder (ADU-level) self.reps_for_type_classification = self.ac_shell_rep_size_out # the size of ADU representations for link identification if self.lstm_type: self.type_rep_size = self.lstm_out else: self.type_rep_size = self.ac_shell_rep_size_out # the size of ADU pair representation self.span_pair_size = self.type_rep_size * 3 + self.relative_position_info_size n_ac_shell_latm_layers = 1 super(BaseArgStrParser, self).__init__() with self.init_scope(): self.Embed_x = chaLink.EmbedID(self.encVocabSize, self.eDim, ignore_label=-1) self.Bilstm = chaLink.NStepBiLSTM(n_layers=1, in_size=self.eDim, out_size=self.hDim, dropout=self.dropout_lstm) if self.lstm_ac: self.AcBilstm = chaLink.NStepBiLSTM(n_layers=1, in_size=self.span_rep_size, out_size=self.hDim, dropout=self.dropout_lstm) if self.lstm_shell: self.ShellBilstm = chaLink.NStepBiLSTM( n_layers=1, in_size=self.span_rep_size, out_size=self.hDim, dropout=self.dropout_lstm) self.AcShellBilstm = chaLink.NStepBiLSTM( n_layers=n_ac_shell_latm_layers, in_size=self.ac_shell_rep_size_in, out_size=self.hDim, dropout=self.dropout_lstm) self.LastBilstm = chaLink.NStepBiLSTM( n_layers=1, in_size=self.ac_shell_rep_size_out, out_size=self.hDim, dropout=self.dropout_lstm) self.AcTypeLayer = chaLink.Linear( in_size=self.reps_for_type_classification, out_size=3, initialW=chaInit.Uniform(0.05), initial_bias=chaInit.Uniform(0.05)) self.LinkTypeLayer = chaLink.Linear( in_size=self.reps_for_type_classification, out_size=2, initialW=chaInit.Uniform(0.05), initial_bias=chaInit.Uniform(0.05)) self.RelationLayer = chaLink.Linear( in_size=self.span_pair_size, out_size=1, initialW=chaInit.Uniform(0.05), initial_bias=chaInit.Uniform(0.05)) self.BowFCLayer = chaLink.Linear( in_size=self.bow_feature_size, out_size=self.bow_rep_size, initialW=chaInit.Uniform(0.05), initial_bias=chaInit.Uniform(0.05)) self.root_embedding = chainer.Parameter( initializer=chaInit.Uniform(0.05), shape=self.type_rep_size) # self.position_info[0:12]: forward position # self.position_info[12:24]: backward position # self.position_info[24:28]: paragraph type if self.use_elmo: self.elmo_task_gamma = chainer.Parameter( initializer=chaInit.Constant(1), shape=1) self.elmo_task_s = chainer.Parameter( initializer=chaInit.Constant(1), shape=3) for param in self.Bilstm.params(): param = chaInit.Orthogonal() if self.lstm_ac: for param in self.AcBilstm.params(): param = chaInit.Orthogonal() if self.lstm_shell: for param in self.ShellBilstm.params(): param = chaInit.Orthogonal() for param in self.AcShellBilstm.params(): param = chaInit.Orthogonal() for param in self.LastBilstm.params(): param = chaInit.Orthogonal()
import chainer import chainer.functions as F import chainer.links as L import chainer.initializers as I import numpy as np initW = I.Orthogonal(dtype=np.float32) class FitNet1(chainer.Chain): def __init__(self, class_labels=10): super(FitNet1, self).__init__() with self.init_scope(): self.conv1_1 = L.Convolution2D(3, 16, ksize=(3, 3), pad=1, initialW=initW) self.conv1_2 = L.Convolution2D(16, 16, ksize=(3, 3), pad=1, initialW=initW) self.conv1_3 = L.Convolution2D(16, 16, ksize=(3, 3), pad=1, initialW=initW) self.conv2_1 = L.Convolution2D(16,