def _build(self): nw = len( self.initial_weights) if self.initial_weights is not None else 0 if self.initial_state is not None: self.h = sharedX(self.initial_state[0]) self.c = sharedX(self.initial_state[1]) del self.initial_state elif self.batch_size is not None: self.h = shared_zeros((self.batch_size, self.hidden_dim)) self.c = shared_zeros((self.batch_size, self.hidden_dim)) elif self.initial_weights is not None: if nw == len(self.params) + 2: self.h = sharedX(self.initial_weights[-1]) self.c = sharedX(self.initial_weights[-2]) nw -= 2 else: raise Exception("Hidden state not provided in weights") else: raise Exception( "One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights" ) self.state = [self.h, self.c] if self.initial_weights is not None: self.set_weights(self.initial_weights[:nw]) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() scale=0.05 self.W_maxout = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.n_opt, 2 , self.n_pieces))) self.b_maxout = shared_zeros((self.output_dim, self.n_opt, self.n_pieces)) self.W_g = self.init((input_dim, self.output_dim)) self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, self.n_opt , self.output_dim))) self.b_g = shared_zeros((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_maxout, self.b_maxout, self.W_g, self.U_g, self.b_g, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_i = self.init((input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = self.init((input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = self.forget_bias_init((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, ] nw = len( self.initial_weights) if self.initial_weights is not None else 0 if self.initial_state is not None: self.h = sharedX(self.initial_state[0]) self.c = sharedX(self.initial_state[1]) del self.initial_state elif self.batch_size is not None: self.h = shared_zeros((self.batch_size, self.output_dim)) self.c = shared_zeros((self.batch_size, self.output_dim)) elif self.initial_weights is not None: if nw == len(self.params) + 2: self.h = sharedX(self.initial_weights[-1]) self.c = sharedX(self.initial_weights[-2]) nw -= 2 else: raise Exception("Hidden state not provided in weights") else: raise Exception( "One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights" ) self.state = [self.h, self.c] if self.initial_weights is not None: self.set_weights(self.initial_weights[:nw]) del self.initial_weights
def __init__(self, input_dim, output_dim, causes_dim, init='glorot_uniform', activation='linear', truncate_gradient=-1, gamma=.1, n_steps=10, return_mode='all', W_regularizer=None, V_regularizer=None, activity_regularizer=None, code_shape=None, pool_size=None, **kwargs): super(Sparse2L, self).__init__() self.init = initializations.get(init) self.input_dim = input_dim self.output_dim = output_dim self.causes_dim = causes_dim self.gamma = gamma self.n_steps = n_steps self.truncate_gradient = truncate_gradient self.activation = activations.get(activation) self.return_mode = return_mode self.input = T.matrix() self.pool_flag = False if (code_shape is not None) and (pool_size is not None): self.code_shape = code_shape self.pool_size = pool_size self.pool_flag = True self.W = self.init((self.output_dim, self.input_dim)) if self.pool_flag: new_dim = int(np.sqrt(output_dim)/self.pool_size)**2 self.V = sharedX(np.random.uniform(low=0, high=1, size=(self.causes_dim, new_dim))) else: self.V = sharedX(np.random.uniform(low=0, high=.1, size=(self.causes_dim, self.output_dim))) self.params = [self.W, self.V] self.regularizers = [] if W_regularizer: W_regularizer.set_param(self.W) self.regularizers.append(W_regularizer) V_regularizer.set_param(self.V) self.regularizers.append(V_regularizer) if activity_regularizer: activity_regularizer.set_layer(self) self.regularizers.append(activity_regularizer) kwargs['input_shape'] = (None, self.input_dim) super(Sparse2L, self).__init__(**kwargs)
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() scale = 0.05 # self.W_maxout = sharedX(np.random.uniform(low=-scale, high=scale, size=(2, self.n_opt , self.n_pieces))) self.W_maxout_1 = sharedX( np.random.uniform(low=-scale, high=scale, size=(self.n_opt, self.output_dim, self.output_dim, self.n_pieces))) self.W_maxout_2 = sharedX( np.random.uniform(low=-scale, high=scale, size=(self.n_opt, self.output_dim, self.output_dim, self.n_pieces))) self.b_maxout = shared_zeros( ((self.n_opt, self.output_dim, self.n_pieces))) self.W_g = self.init((input_dim, self.output_dim)) self.U_g = sharedX( np.random.uniform(low=-scale, high=scale, size=(self.output_dim, self.n_opt, self.output_dim))) self.b_g = shared_zeros((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_maxout_1, self.W_maxout_2, self.b_maxout, self.W_g, self.U_g, self.b_g, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def __init__(self, input_dim, output_dim, causes_dim, init='glorot_uniform', activation='linear', truncate_gradient=-1, gamma=.1, n_steps=10, return_mode='all', W_regularizer=None, V_regularizer=None, activity_regularizer=None, code_shape=None, pool_size=None): super(Sparse2L, self).__init__() self.init = initializations.get(init) self.input_dim = input_dim self.output_dim = output_dim self.causes_dim = causes_dim self.gamma = gamma self.n_steps = n_steps self.truncate_gradient = truncate_gradient self.activation = activations.get(activation) self.return_mode = return_mode self.input = T.matrix() self.pool_flag = False if (code_shape is not None) and (pool_size is not None): self.code_shape = code_shape self.pool_size = pool_size self.pool_flag = True self.W = self.init((self.output_dim, self.input_dim)) if self.pool_flag: new_dim = int(np.sqrt(output_dim)/self.pool_size)**2 self.V = sharedX(np.random.uniform(low=0, high=1, size=(self.causes_dim, new_dim))) else: self.V = sharedX(np.random.uniform(low=0, high=.1, size=(self.causes_dim, self.output_dim))) self.params = [self.W, self.V] self.regularizers = [] if W_regularizer: W_regularizer.set_param(self.W) self.regularizers.append(W_regularizer) V_regularizer.set_param(self.V) self.regularizers.append(V_regularizer) if activity_regularizer: activity_regularizer.set_layer(self) self.regularizers.append(activity_regularizer)
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() scale=0.05 self.W_g = self.init((input_dim, self.output_dim)) self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, 6 , self.output_dim))) self.b_g = shared_zeros((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) scalar_init = 0.5 # self.k_parameters = shared_ones((11,)) self.k_parameters = sharedX(np.random.uniform(low=scalar_init-scale, high=scalar_init+scale, size=(11, ))) # self.sigma_se = shared_scalar(scalar_init) # self.sigma_per = shared_scalar(scalar_init) # self.sigma_b_lin = shared_scalar(scalar_init) # self.sigma_v_lin = shared_scalar(scalar_init) # self.sigma_rq = shared_scalar(scalar_init) # self.l_se = shared_scalar(scalar_init) # self.l_per = shared_scalar(scalar_init) # self.l_lin = shared_scalar(scalar_init) # self.l_rq = shared_scalar(scalar_init) # self.alpha_rq = shared_scalar(scalar_init) # self.p_per = shared_scalar(scalar_init) self.params = [ self.k_parameters, # self.sigma_se, self.sigma_per, self.sigma_b_lin, self.sigma_v_lin,self.sigma_rq, # self.l_se, self.l_per, self.l_lin, self.l_rq, # self.alpha_rq, self.p_per, self.W_g, self.U_g, self.b_g, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_g = self.init((input_dim, self.output_dim)) # self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, 6 , self.output_dim))) self.U_g = self.inner_init((self.output_dim, 6, self.output_dim)) self.b_g = shared_zeros((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.EPS = 1e-10 scalar_init = 1 scale = 0.01 # self.k_parameters = shared_ones((11,)) self.k_parameters = sharedX( np.random.uniform(low=scalar_init - scale, high=scalar_init + scale, size=(11, ))) # self.sigma_se = shared_scalar(scalar_init) # self.sigma_per = shared_scalar(scalar_init) # self.sigma_b_lin = shared_scalar(scalar_init) # self.sigma_v_lin = shared_scalar(scalar_init) # self.sigma_rq = shared_scalar(scalar_init) # self.l_se = shared_scalar(scalar_init) # self.l_per = shared_scalar(scalar_init) # self.l_lin = shared_scalar(scalar_init) # self.l_rq = shared_scalar(scalar_init) # self.alpha_rq = shared_scalar(scalar_init) # self.p_per = shared_scalar(scalar_init) self.params = [ self.k_parameters, # self.sigma_se, self.sigma_per, self.sigma_b_lin, self.sigma_v_lin,self.sigma_rq, # self.l_se, self.l_per, self.l_lin, self.l_rq, # self.alpha_rq, self.p_per, self.W_g, self.U_g, self.b_g, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_x2e = self.init((self.n_experts, input_dim, self.output_dim)) self.W_x2g = self.init((input_dim, self.output_dim)) self.b_x2e = shared_zeros((self.n_experts, self.output_dim)) self.b_x2g = shared_zeros((self.output_dim)) self.W_h2e = shared_zeros( (self.n_experts, self.output_dim, self.output_dim)) scale = 0.05 self.U_g = sharedX( np.random.uniform(low=-scale, high=scale, size=(self.output_dim, self.n_experts, self.output_dim))) self.params = [ self.W_x2e, self.W_x2g, self.b_x2g, self.b_x2e, self.W_h2e, self.U_g ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_i = self.init((input_dim, self.output_dim)) self.U_i = self.inner_init((self.output_dim, self.output_dim)) self.b_i = shared_zeros((self.output_dim)) self.W_f = self.init((input_dim, self.output_dim)) self.U_f = self.inner_init((self.output_dim, self.output_dim)) self.b_f = self.forget_bias_init((self.output_dim)) self.W_c = self.init((input_dim, self.output_dim)) self.U_c = self.inner_init((self.output_dim, self.output_dim)) self.b_c = shared_zeros((self.output_dim)) self.W_o = self.init((input_dim, self.output_dim)) self.U_o = self.inner_init((self.output_dim, self.output_dim)) self.b_o = shared_zeros((self.output_dim)) self.params = [ self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c, self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o, ] nw = len(self.initial_weights) if self.initial_weights is not None else 0 if self.initial_state is not None: self.h = sharedX(self.initial_state[0]) self.c = sharedX(self.initial_state[1]) del self.initial_state elif self.batch_size is not None: self.h = shared_zeros((self.batch_size, self.output_dim)) self.c = shared_zeros((self.batch_size, self.output_dim)) elif self.initial_weights is not None: if nw == len(self.params) + 2: self.h = sharedX(self.initial_weights[-1]) self.c = sharedX(self.initial_weights[-2]) nw -= 2 else: raise Exception("Hidden state not provided in weights") else: raise Exception("One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights") self.state = [self.h, self.c] if self.initial_weights is not None: self.set_weights(self.initial_weights[:nw]) del self.initial_weights
def __init__(self, n_vocab, dim_word, dim_ctx, dim): self.n_vocab = n_vocab self.dim_word = dim_word self.dim_ctx = dim_ctx self.dim = dim ### Word Embedding ### self.Wemb = initializations.uniform((n_vocab, self.dim_word)) ### LSTM initialization NN ### self.Init_state_W = initializations.uniform((self.dim_ctx, self.dim)) self.Init_state_b = shared_zeros((self.dim)) self.Init_memory_W = initializations.uniform((self.dim_ctx, self.dim)) self.Init_memory_b = shared_zeros((self.dim)) ### Main LSTM ### self.lstm_W = initializations.uniform((self.dim_word, self.dim * 4)) self.lstm_U = sharedX(np.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1)) self.lstm_b = shared_zeros((self.dim*4)) self.Wc = initializations.uniform((self.dim_ctx, self.dim*4)) # image -> LSTM hidden self.Wc_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) # image -> 뉴럴넷 한번 돌린것 self.Wd_att = initializations.uniform((self.dim, self.dim_ctx)) # LSTM hidden -> image에 영향 self.b_att = shared_zeros((self.dim_ctx)) self.U_att = initializations.uniform((self.dim_ctx, 1)) # image 512개 feature 1차원으로 줄임 self.c_att = shared_zeros((1)) ### Decoding NeuralNets ### self.decode_lstm_W = initializations.uniform((self.dim, self.dim_word)) self.decode_lstm_b = shared_zeros((self.dim_word)) self.decode_word_W = initializations.uniform((self.dim_word, n_vocab)) self.decode_word_b = shared_zeros((n_vocab)) self.params = [self.Wemb, self.Init_state_W, self.Init_state_b, self.Init_memory_W, self.Init_memory_b, self.lstm_W, self.lstm_U, self.lstm_b, self.Wc, self.Wc_att, self.Wd_att, self.b_att, self.U_att, self.c_att, self.decode_lstm_W, self.decode_lstm_b, self.decode_word_W, self.decode_word_b] self.param_names = ['Wemb', 'Init_state_W', 'Init_state_b', 'Init_memory_W', 'Init_memory_b', 'lstm_W', 'lstm_U', 'lstm_b', 'Wc', 'Wc_att', 'Wd_att', 'b_att', 'U_att', 'c_att', 'decode_lstm_W', 'decode_lstm_b', 'decode_word_W', 'decode_word_b']
def _build(self): nw = len(self.initial_weights) if self.initial_weights is not None else 0 if self.initial_state is not None: self.h = sharedX(self.initial_state[0]) self.c = sharedX(self.initial_state[1]) del self.initial_state elif self.batch_size is not None: self.h = shared_zeros((self.batch_size, self.hidden_dim)) self.c = shared_zeros((self.batch_size, self.hidden_dim)) elif self.initial_weights is not None: if nw == len(self.params) + 2: self.h = sharedX(self.initial_weights[-1]) self.c = sharedX(self.initial_weights[-2]) nw -= 2 else: raise Exception("Hidden state not provided in weights") else: raise Exception("One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights") self.state = [self.h, self.c] if self.initial_weights is not None: self.set_weights(self.initial_weights[:nw]) del self.initial_weights
def __init__(self, batch_size, n_filters, filter_width): super(ZeroFillDiagonals, self).__init__() self.batch_size = batch_size self.n_filters = n_filters self.filter_width = filter_width # Construct a shared boolean matrix by which to multiply the input # element-wise. It should be 0 everywhere except on the diagonals # of the last two dimensions. input_shape = (batch_size, n_filters, filter_width, filter_width) mask = np.ones(input_shape) diag_indices = np.arange(filter_width) for i in np.arange(batch_size): for j in np.arange(n_filters): mask[i, j, diag_indices, diag_indices] = 0 self.mask = sharedX(mask, dtype='int32')
def build(self): input_dim = self.input_shape[2] self.input = T.tensor3() self.W_x2e = self.init((self.n_experts, input_dim, self.output_dim)) self.W_x2g = self.init((input_dim, self.output_dim)) self.b_x2e = shared_zeros((self.n_experts, self.output_dim)) self.b_x2g = shared_zeros((self.output_dim)) self.W_h2e = shared_zeros((self.n_experts, self.output_dim, self.output_dim)) scale = 0.05 self.U_g = sharedX( np.random.uniform(low=-scale, high=scale, size=(self.output_dim, self.n_experts, self.output_dim)) ) self.params = [self.W_x2e, self.W_x2g, self.b_x2g, self.b_x2e, self.W_h2e, self.U_g] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def prep_embedding(self, embedding): self.embed_matrix = sharedX(embedding.embed_matrix)