def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] input_leng, input_dim = input_shape[1:] if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name)) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.rnn_size, init=self.init, forget_bias_init='zero', inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name)) else: raise ValueError('this inner_rnn is not implemented yet.') inner_shape = list(input_shape) inner_shape[-1] = input_dim+self.m_length self.rnn.build(inner_shape) self.init_h = K.zeros((self.rnn_size), name="{}_init_h".format(self.name)) self.W_d = self.rnn.init((self.rnn_size,1), name="{}_W_d".format(self.name)) self.W_u = self.rnn.init((self.rnn_size,1), name="{}_W_u".format(self.name)) self.W_v = self.rnn.init((self.rnn_size,self.m_length), name="{}_W_v".format(self.name)) self.W_o = self.rnn.init((self.rnn_size,self.output_dim), name="{}_W_o".format(self.name)) self.b_d = K.zeros((1,), name="{}_b_d".format(self.name)) self.b_u = K.zeros((1,), name="{}_b_u".format(self.name)) self.b_v = K.zeros((self.m_length,), name="{}_b_v".format(self.name)) self.b_o = K.zeros((self.output_dim,), name="{}_b_o".format(self.name)) self.trainable_weights = self.rnn.trainable_weights + [ self.W_d, self.b_d, self.W_v, self.b_v, self.W_u, self.b_u, self.W_o, self.b_o, self.init_h] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.rnn_size), name="{}_init_c".format(self.name)) self.trainable_weights = self.trainable_weights + [self.init_c, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weight
def build(self): self.input = T.tensor4() if self.inner_rnn == 'gru': self.enc = GRU( input_length=self.n_steps, input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) self.dec = GRU( input_length=self.n_steps, input_dim=self.code_dim, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.enc = LSTM( input_length=self.n_steps, input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) self.dec = LSTM( input_length=self.n_steps, input_dim=self.code_dim, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) else: raise ValueError('This type of inner_rnn is not supported') self.enc.build() self.dec.build() self.init_canvas = shared_zeros(self._input_shape) # canvas and hidden state self.init_h_enc = shared_zeros((self.output_dim)) # initial values self.init_h_dec = shared_zeros((self.output_dim)) # should be trained self.L_enc = self.enc.init((self.output_dim, 5)) # "read" attention parameters (eq. 21) self.L_dec = self.enc.init((self.output_dim, 5)) # "write" attention parameters (eq. 28) self.b_enc = shared_zeros((5)) # "read" attention parameters (eq. 21) self.b_dec = shared_zeros((5)) # "write" attention parameters (eq. 28) self.W_patch = self.enc.init((self.output_dim, self.N_dec**2*self._input_shape[0])) self.b_patch = shared_zeros((self.N_dec**2*self._input_shape[0])) self.W_mean = self.enc.init((self.output_dim, self.code_dim)) self.W_sigma = self.enc.init((self.output_dim, self.code_dim)) self.b_mean = shared_zeros((self.code_dim)) self.b_sigma = shared_zeros((self.code_dim)) self.trainable_weights = self.enc.trainable_weights + self.dec.trainable_weights + [ self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch, self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma, self.init_canvas, self.init_h_enc, self.init_h_dec] if self.inner_rnn == 'lstm': self.init_cell_enc = shared_zeros((self.output_dim)) # initial values self.init_cell_dec = shared_zeros((self.output_dim)) # should be trained self.trainable_weights = self.trainable_weights + [self.init_cell_dec, self.init_cell_enc]
def __init__(self, input_shape, h_dim, z_dim, N_enc=2, N_dec=5, n_steps=64, inner_rnn='gru', truncate_gradient=-1, return_sequences=False, canvas_activation=T.nnet.sigmoid, init='glorot_uniform', inner_init='orthogonal'): self.input = T.tensor4() self.h_dim = h_dim # this is 256 for MNIST self.z_dim = z_dim # this is 100 for MNIST self.input_shape = input_shape self.N_enc = N_enc self.N_dec = N_dec self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.n_steps = n_steps self.canvas_activation = canvas_activation self.height = input_shape[1] self.width = input_shape[2] self.inner_rnn = inner_rnn if inner_rnn == 'gru': self.enc = GRU(input_dim=self.input_shape[0]*2*self.N_enc**2 + h_dim, output_dim=h_dim, init=init, inner_init=inner_init) self.dec = GRU(input_dim=z_dim, output_dim=h_dim, init=init, inner_init=inner_init) elif inner_rnn == 'lstm': self.enc = LSTM(input_dim=self.input_shape[0]*2*self.N_enc**2 + h_dim, output_dim=h_dim, init=init, inner_init=inner_init) self.dec = LSTM(input_dim=z_dim, output_dim=h_dim, init=init, inner_init=inner_init) else: raise ValueError('This type of inner_rnn is not supported') self.init_canvas = shared_zeros(input_shape) # canvas and hidden state self.init_h_enc = shared_zeros((h_dim)) # initial values self.init_h_dec = shared_zeros((h_dim)) # should be trained self.L_enc = self.enc.init((h_dim, 5)) # "read" attention parameters (eq. 21) self.L_dec = self.enc.init((h_dim, 5)) # "write" attention parameters (eq. 28) self.b_enc = shared_zeros((5)) # "read" attention parameters (eq. 21) self.b_dec = shared_zeros((5)) # "write" attention parameters (eq. 28) self.W_patch = self.enc.init((h_dim, self.N_dec**2*self.input_shape[0])) self.b_patch = shared_zeros((self.N_dec**2*self.input_shape[0])) self.W_mean = self.enc.init((h_dim, z_dim)) self.W_sigma = self.enc.init((h_dim, z_dim)) self.b_mean = shared_zeros((z_dim)) self.b_sigma = shared_zeros((z_dim)) self.params = self.enc.params + self.dec.params + [ self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch, self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma]
def build(self): input_leng, input_dim = self.input_shape[1:] self.input = T.tensor3() if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.rnn_size, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) else: raise ValueError('this inner_rnn is not implemented yet.') self.rnn.build() self.init_h = K.zeros((self.rnn_size)) self.W_d = self.rnn.init((self.rnn_size,1)) self.W_u = self.rnn.init((self.rnn_size,1)) self.W_v = self.rnn.init((self.rnn_size,self.m_length)) self.W_o = self.rnn.init((self.rnn_size,self.output_dim)) self.b_d = K.zeros((1,),name="b_d") self.b_u = K.zeros((1,),name="b_u") self.b_v = K.zeros((self.m_length,)) self.b_o = K.zeros((self.output_dim,)) self.trainable_weights = self.rnn.trainable_weights + [ self.W_d, self.b_d, self.W_v, self.b_v, self.W_u, self.b_u, self.W_o, self.b_o, self.init_h] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.rnn_size)) self.trainable_weights = self.trainable_weights + [self.init_c, ]
def create_o_test_model(train_model, examples, hidden_size, embed_size, glove, batch_size = 64, prem_len = 22): graph = Graph() hypo_layer = LSTM(output_dim= hidden_size, batch_input_shape=(batch_size, 1, embed_size), return_sequences=True, stateful = True, trainable = False) graph.add_input(name='hypo_input', batch_input_shape=(batch_size, 1), dtype = 'int32') graph.add_node(make_fixed_embeddings(glove, 1), name = 'hypo_word_vec', input='hypo_input') graph.add_node(hypo_layer, name = 'hypo', input='hypo_word_vec') graph.add_input(name='premise', batch_input_shape=(batch_size, prem_len, embed_size)) graph.add_input(name='creative', batch_input_shape=(batch_size, embed_size)) attention = LstmAttentionLayer(hidden_size, return_sequences=True, stateful = True, trainable = False, feed_state = False) graph.add_node(attention, name='attention', inputs=['premise', 'hypo', 'creative'], merge_mode='join') graph.add_input(name='train_input', batch_input_shape=(batch_size, 1), dtype='int32') hs = HierarchicalSoftmax(len(glove), input_dim = hidden_size, input_length = 1, trainable = False) graph.add_node(hs, name = 'softmax', inputs=['attention','train_input'], merge_mode = 'join') graph.add_output(name='output', input='softmax') hypo_layer.set_weights(train_model.nodes['hypo'].get_weights()) attention.set_weights(train_model.nodes['attention'].get_weights()) hs.set_weights(train_model.nodes['softmax'].get_weights()) graph.compile(loss={'output': hs_categorical_crossentropy}, optimizer='adam') func_premise = theano.function([train_model.inputs['premise_input'].get_input()], train_model.nodes['premise'].get_output(False), allow_input_downcast=True) func_noise = theano.function([train_model.inputs['noise_input'].get_input(), train_model.inputs['class_input'].get_input()], train_model.nodes['creative'].get_output(False), allow_input_downcast=True) return graph, func_premise, func_noise
def build(self, input_shape): input_leng, input_dim = input_shape[1:] # self.input = T.tensor3() self.lstm = LSTM( input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) self.lstm.build(input_shape) # initial memory, state, read and write vecotrs self.M = theano.shared((.001 * np.ones((1,)).astype(floatX))) self.init_h = backend.zeros((self.output_dim)) self.init_wr = self.lstm.init((self.n_slots,)) self.init_ww = self.lstm.init((self.n_slots,)) # write self.W_e = self.lstm.init((self.output_dim, self.m_length)) # erase self.b_e = backend.zeros((self.m_length)) self.W_a = self.lstm.init((self.output_dim, self.m_length)) # add self.b_a = backend.zeros((self.m_length)) # get_w parameters for reading operation self.W_k_read = self.lstm.init((self.output_dim, self.m_length)) self.b_k_read = self.lstm.init((self.m_length,)) self.W_c_read = self.lstm.init((self.output_dim, 3)) self.b_c_read = backend.zeros((3)) self.W_s_read = self.lstm.init((self.output_dim, self.shift_range)) self.b_s_read = backend.zeros((self.shift_range)) # b_s lol! not intentional # get_w parameters for writing operation self.W_k_write = self.lstm.init((self.output_dim, self.m_length)) self.b_k_write = self.lstm.init((self.m_length,)) self.W_c_write = self.lstm.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = backend.zeros((3)) self.W_s_write = self.lstm.init((self.output_dim, self.shift_range)) self.b_s_write = backend.zeros((self.shift_range)) self.C = circulant(self.n_slots, self.shift_range) self.trainable_weights = self.lstm.trainable_weights + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww] self.init_c = backend.zeros((self.output_dim)) self.trainable_weights = self.trainable_weights + [self.init_c, ]
for t, char in enumerate(sentence): x[0, t, char_indices[char]] = 1.0 return x sentence = "^" + sys.argv[1] sentence = "".join([c for c in sentence if c in char_indices]) x = create_input(sentence) # build the model: 2 stacked LSTM print("Build model...") model = Sequential() first_layer = LSTM(512, return_sequences=True, input_shape=(None, len(chars))) model.add(first_layer) model.add(Dropout(0.5)) second_layer = LSTM(512, return_sequences=True) model.add(second_layer) model.add(Dropout(0.5)) model.add(TimeDistributedDense(len(chars))) model.add(Activation("softmax")) print("creating function") layer_output = theano.function([model.get_input(train=False)], second_layer.get_output(train=False)) W = layer_output(x)[0] print(W.shape) dists = [] for i in xrange(W.shape[0]): for j in xrange(i + 1, W.shape[0]): # m = (W[i] + W[j]) / 2
class Stack(Recurrent): """ Neural Turing Machines Non obvious parameter: ---------------------- shift_range: int, number of available shifts, ex. if 3, avilable shifts are (-1, 0, 1) n_slots: number of memory locations m_length: memory length at each location Known issues: ------------- Theano may complain when n_slots == 1. """ def __init__(self, output_dim, n_slots, m_length, inner_rnn='lstm',rnn_size=64, stack=True, init='glorot_uniform', inner_init='orthogonal', input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.n_slots = n_slots + 1 # because we start at time 1 self.m_length = m_length self.init = init self.inner_init = inner_init self.inner_rnn = inner_rnn self.rnn_size = rnn_size self.stack = stack self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(Stack, self).__init__(**kwargs) def build(self): input_leng, input_dim = self.input_shape[1:] self.input = T.tensor3() if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.rnn_size, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) else: raise ValueError('this inner_rnn is not implemented yet.') self.rnn.build() self.init_h = K.zeros((self.rnn_size)) self.W_d = self.rnn.init((self.rnn_size,1)) self.W_u = self.rnn.init((self.rnn_size,1)) self.W_v = self.rnn.init((self.rnn_size,self.m_length)) self.W_o = self.rnn.init((self.rnn_size,self.output_dim)) self.b_d = K.zeros((1,),name="b_d") self.b_u = K.zeros((1,),name="b_u") self.b_v = K.zeros((self.m_length,)) self.b_o = K.zeros((self.output_dim,)) self.trainable_weights = self.rnn.trainable_weights + [ self.W_d, self.b_d, self.W_v, self.b_v, self.W_u, self.b_u, self.W_o, self.b_o, self.init_h] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.rnn_size)) self.trainable_weights = self.trainable_weights + [self.init_c, ] #self.trainable_weights =[self.W_d] def get_initial_states(self, X): batch_size = X.shape[0] init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0) init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0) init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0) init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0) itime = K.zeros((1,),dtype=np.int32) if self.inner_rnn == 'lstm': init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0) return [init_r , init_V,init_S,itime,init_h,init_c] @property def output_shape(self): input_shape = self.input_shape if self.return_sequences: return input_shape[0], input_shape[1], self.output_dim else: return input_shape[0], self.output_dim def get_full_output(self, train=False): """ This method is for research and visualization purposes. Use it as X = model.get_input() # full model Y = ntm.get_output() # this layer F = theano.function([X], Y, allow_input_downcast=True) [memory, read_address, write_address, rnn_state] = F(x) if inner_rnn == "lstm" use it as [memory, read_address, write_address, rnn_cell, rnn_state] = F(x) """ # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) assert K.ndim(X) == 3 if K._BACKEND == 'tensorflow': if not self.input_shape[1]: raise Exception('When using TensorFlow, you should define ' + 'explicitely the number of timesteps of ' + 'your sequences. Make sure the first layer ' + 'has a "batch_input_shape" argument ' + 'including the samples axis.') mask = self.get_output_mask(train) if mask: # apply mask X *= K.cast(K.expand_dims(mask), X.dtype) masking = True else: masking = False if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(X) states = rnn_states(self.step, X, initial_states, go_backwards=self.go_backwards, masking=masking) return states def step(self, x, states): r_tm1, V_tm1,s_tm1,time = states[:4] h_tm1 = states[4:] def print_name_shape(name,x): return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32") r_tm1 = r_tm1 + print_name_shape("out\nr_tm1",r_tm1) + \ print_name_shape("V_tm1",V_tm1) + \ print_name_shape("s_tm1",s_tm1) + \ print_name_shape("x",x) + \ print_name_shape("h_tm1_0",h_tm1[0]) + \ print_name_shape("h_tm1_1",h_tm1[1]) op_t, h_t = self._update_controller( T.concatenate([x, r_tm1], axis=-1), h_tm1) # op_t = op_t + print_name_shape("W_d",self.W_d.get_value()) op_t = op_t + print_name_shape("afterop_t",op_t) #op_t = op_t[:,0,:] ao = K.dot(op_t, self.W_d) ao = ao +print_name_shape("ao",ao) d_t = K.sigmoid( ao + self.b_d) + print_name_shape("afterop2_t",op_t) u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)+ print_name_shape("d_t",op_t) v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) + print_name_shape("u_t",u_t) o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) + print_name_shape("v_t",v_t) o_t = o_t + print_name_shape("afterbulk_t",o_t) time = time + 1 V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], u_t[::,0], v_t,time[0],stack=self.stack) #V_t, s_t, r_t = V_tm1,s_tm1,T.sum(V_tm1,axis = 1) V_t = V_t + print_name_shape("o_t",o_t) + \ print_name_shape("r_t",r_t) + \ print_name_shape("V_t",V_t) +\ print_name_shape("s_t",s_t) # T.cast( theano.printing.Print("time")(time[0]),"float32") #time = T.set_subtensor(time[0],time[0] +) return o_t, [r_t, V_t, s_t, time] + h_t def _update_controller(self, inp , h_tm1): """We have to update the inner RNN inside the NTM, this is the function to do it. Pretty much copy+pasta from Keras """ def print_name_shape(name,x,shape=True): if shape: return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32") else: return theano.printing.Print(name)(x) #1 is for gru, 2 is for lstm if len(h_tm1) in [1,2]: if hasattr(self.rnn,"get_constants"): BW,BU = self.rnn.get_constants(inp) h_tm1 += (BW,BU) # update state op_t, h = self.rnn.step(inp + print_name_shape("inp",inp), h_tm1) return op_t + print_name_shape("opt",op_t) +print_name_shape("h",h[0]) +print_name_shape("h",h[1])\ , h
token = Tokenizer(num_words=5000) token.fit_on_texts(text_train) X_train_seq = token.texts_to_sequences(text_train) X_test_seq = token.texts_to_sequences(text_test) X_train = sequence.pad_sequences(X_train_seq, maxlen=300) X_test = sequence.pad_sequences(X_test_seq, maxlen=300) print(len(X_train_seq[104])) print(len(X_train[104])) print(len(X_train_seq[6])) print(len(X_train[1])) print((X_train[6])) model = Sequential() model.add(Embedding(output_dim=32, input_dim=5000, input_length=300)) model.add(Dropout(0.5)) model.add(LSTM(32)) model.add(Dense(units=256, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(units=1, activation='sigmoid')) model.summary() model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) train_history = model.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=100, verbose=2)
X_train = sequence.pad_sequences(X_train, maxlen=max_len) #y_train= to_categorical(y_train) #y_test = to_categorical(y_test) max_features = 5000 model = Sequential() print('Build model...') embedding_vecor_length = 32 model = Sequential() model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len)) model.add(Dropout(0.2)) model.add(LSTM(100)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) print(model.summary()) checkpointer = callbacks.ModelCheckpoint(filepath="logs/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') csv_logger = CSVLogger('logs/training_set_iranalysis.csv',separator=',', append=False) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_train, y_train), shuffle=True,callbacks=[checkpointer,csv_logger]) score, acc = model.evaluate(X_train, y_train, batch_size=32) print('Test score:', score) print('Test accuracy:', acc)
# model.fit(imgs, Y_label, epochs=5,batch_size=64) print(len(test_img_y)) test_img_x = np.array(test_img_x) num = int(test_img_x.shape[0]/num_of_frames) test_img_x = np.reshape(test_img_x,(num,num_of_frames,dim_x,dim_y)) test_img_y = np.array(test_img_y) test_img_x = np.reshape(test_img_x,(test_img_x.shape[0],num_of_frames,dim_x*dim_y)) # *********************************************************** model = Sequential() model.add(LSTM(50,input_shape = (num_of_frames,dim_x*dim_y),return_sequences =True,dropout = 0.2)) # model.add(LSTM(50,return_sequences=True,dropout=0.2)) model.add(Flatten()) model.add(Dense(6,activation='softmax')) # model.add(BatchNormalization()) model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy']) model.summary() history = model.fit(img_x, img_y, epochs=100,batch_size=124,validation_data=(test_img_x,test_img_y)) end = time.time() print("\n\nTime to train the LSTM MODEL: ",end-start) import matplotlib.pyplot as plt from matplotlib import style plt.rcParams['figure.figsize'] = [10, 10]
def train_model(path, max_sentence_len=40, overlap_size=0, num_epochs=20, full_model_filename=None, initial_epoch=0, filter_max=True): if overlap_size is None: overlap_size = max_sentence_len - 1 assert (0 <= overlap_size < max_sentence_len) print('\nLoading GloVe...') nlp = spacy.load('en_vectors_web_lg') word_model = nlp.vocab print('\nPreparing the sentences...') data_driven_vocabulary = set() with open(path, 'r', encoding='utf-8') as f: docs = f.readlines() sentences = [] for doc in docs: tokens = tokenize(doc, word_model) current_sentences = filter_sentences(tokens, max_sentence_len, overlap_size, filter_max=filter_max) sentences.extend(current_sentences) data_driven_vocabulary = data_driven_vocabulary.union(tokens) num_unique_words = len(data_driven_vocabulary) print('Num sentences: {}'.format(len(sentences))) print('Num unique words: {}'.format(num_unique_words)) # Work on the full GloVe matrix true_pretrained_weights = word_model.vectors.data num_word_features = true_pretrained_weights.shape[1] def true_word2idx(my_word): my_key = word_model.strings[my_word] try: my_row = word_model.vectors.key2row[my_key] except KeyError: print('Word {} unknown'.format(my_word)) my_row = 2091 # the row for 'cat' word return my_row def true_idx2word(my_row): my_key = list(word_model.vectors.keys())[my_row] my_word = word_model.strings[my_key] return my_word # Trim the GloVe matrix to lower RAM usage sorted_data_driven_vocabulary = sorted(list(data_driven_vocabulary)) word_indices = dict( (c, i) for i, c in enumerate(sorted_data_driven_vocabulary)) indices_word = dict( (i, c) for i, c in enumerate(sorted_data_driven_vocabulary)) def word2idx(my_word): return word_indices.get(my_word, None) def idx2word(my_row): return indices_word[my_row] pretrained_weights = np.zeros((num_unique_words, num_word_features)) for my_row in range(num_unique_words): true_row = true_word2idx(idx2word(my_row)) pretrained_weights[my_row] = true_pretrained_weights[true_row, :] vocab_size, emdedding_size = pretrained_weights.shape print('Result embedding shape:', pretrained_weights.shape) print('\nPreparing the data for LSTM...') train_x = np.zeros([len(sentences), max_sentence_len], dtype=np.int32) train_y = np.zeros([len(sentences)], dtype=np.int32) for i, sentence in enumerate(sentences): for t, word in enumerate(sentence[:-1]): train_x[i, t] = word2idx(word) train_y[i] = word2idx(sentence[-1]) print('train_x shape:', train_x.shape) print('train_y shape:', train_y.shape) print('\nTraining LSTM...') model = Sequential() model.add( Embedding(input_dim=vocab_size, output_dim=emdedding_size, embeddings_initializer=Constant(pretrained_weights), trainable=False)) model.add(LSTM(512, return_sequences=True)) model.add(Dropout(0.5)) model.add(LSTM(512, return_sequences=False)) model.add(Dropout(0.5)) model.add(Dense(units=vocab_size, activation='softmax')) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam') def on_epoch_end(epoch, _): print('\nGenerating text after epoch: %d' % epoch) generate_examples(model, sorted_data_driven_vocabulary, word_model) print_callback = LambdaCallback(on_epoch_end=on_epoch_end) save_callback = ModelCheckpoint( filepath='model.word_level_rnn_with_embeddings.epoch_{epoch:02d}.hdf5', save_weights_only=False) if full_model_filename is not None: try: print('Loading model {} with initial epoch = {}'.format( full_model_filename, initial_epoch)) model = load_model(full_model_filename) except FileNotFoundError: print('Model not found. Setting initial epoch to 0.') initial_epoch = 0 model.fit(train_x, train_y, batch_size=128, epochs=num_epochs, initial_epoch=initial_epoch, callbacks=[print_callback, save_callback]) return model, sorted_data_driven_vocabulary
from keras.layers.core import Dense,Dropout from keras.layers.recurrent import LSTM from keras.models import Sequential from sklearn.metrics import mean_squared_error from lstm_multime_pre import data_pre str=1#设置时间刻度 if(str==1): X_train,X_test,y_train,y_test,y_scale=data_pre(1) elif(str==5): X_train,X_test,y_train,y_test,y_scale=data_pre(5) elif(str==10): X_train,X_test,y_train,y_test,y_scale=data_pre(10) model=Sequential() model.add(LSTM(128,return_sequences=True,input_shape=(1,3))) model.add(Dropout(0.2)) model.add(LSTM(64)) model.add(Dropout(0.2)) model.add(Dense(1,activation='sigmoid')) start = time.time() model.compile(loss='mse',optimizer='adam') model.fit(X_train,y_train,batch_size=72,epochs=500,validation_split=0.1,verbose=1) print("Compliation Time : ",time.time()-start) print('存入模型中') if(str==1): model.save('model_1_multime.h5') elif(str==5): model.save('model_5_multime.h5') elif(str==10): model.save('model_10_multime.h5')
train_y_state = train_y_state.reshape([time_length,1]) print("Y is ",train_y_state[0:2,0]) print(train_x.shape,Y.shape) print(train_y_state.shape) # define X-fold cross validation model = Sequential() #パラメータ設定 #model.add(RepeatVector(seq_inout_length,input_dim=input_num)) #stateful=Trueではbatch_input_shapeで三次元配列を与える必要あり(batch-size込) model.add(LSTM(units=n_hidden, return_sequences=False, stateful=False, batch_input_shape=(None,seq_in_length,input_num))) #model.add(TimeDistributed(Dense(units=output_num))) model.add(Activation('tanh')) model.add(Dense(output_num)) model.compile(optimizer='adam',loss='mean_squared_error',metrics=['mae']) #model.compile(optimizer='adam',loss='mape',metrics=['acc']) #model.compile(optimizer='adam',loss=mix_mse_mape,metrics=['acc']) model.summary() ### make callbacks ### add for TensorBoard tb_cb = keras.callbacks.TensorBoard(log_dir=abspath_tflog, #histogram_freq=1, write_grads=True,
trainnumlength = 30 df = pd.read_csv('traindata1023_30.csv', header=None) #读入股票数据 data = df.iloc[:, 0:df.shape[1]].values #取训练数据 # df_test=pd.read_csv('traindata_30.csv',header=None) #读入股票数据 # data_test=df.iloc[:,0:trainnumlength+1].values #取第1-20列 训练数据 ## 网络构建 EMBEDDING_SIZE = 128 HIDDEN_LAYER_SIZE = 64 BATCH_SIZE = 32 NUM_EPOCHS = 10 model = Sequential() model.add(Embedding(8500, EMBEDDING_SIZE, input_length=trainnumlength - 1)) model.add(LSTM(HIDDEN_LAYER_SIZE, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1)) model.add(Activation("sigmoid")) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) ## 网络训练 model.save('btcmodel1.h5') del model model = load_model('btcmodel1.h5') # Convert labels to categorical one-hot encoding # one_hot_labels = keras.utils.to_categorical(labels, num_classes=10) def main():
x1_test = sequence.pad_sequences(x1_test, maxlen=max_len) x2_test = tk_train.texts_to_sequences(test.question2.values.astype(str)) x2_test = sequence.pad_sequences(x2_test, maxlen=max_len) print("[Building the network]") question1 = Input(shape=(max_len,)) question2 = Input(shape=(max_len,)) q1 = Embedding(word_index + 1, 300, weights=[embedding_matrix], input_length=max_len, trainable=False)(question1) q1 = Bidirectional(LSTM(128, return_sequences=True), merge_mode="sum")(q1) q2 = Embedding(word_index + 1, 300, weights=[embedding_matrix], input_length=max_len, trainable=False)(question2) q2 = Bidirectional(LSTM(128, return_sequences=True), merge_mode="sum")(q2) attention = dot([q1,q2], [1,1]) attention = Flatten()(attention) attention = Dense((max_len*128))(attention) attention = Reshape((max_len, 128))(attention) merged = add([q1,attention]) merged = Flatten()(merged)
text_file.write(row + '\n') row = '' for k in range(0, num_features - 5): row += str(y_a[i, k]) row += ',' text_file.write(row + '\n') text_file.write('batch end\n') print('Matrix file has been created...') # build the model: print('Build model...') main_input = Input(shape=(maxlen, num_features), name='main_input') # train a 2-layer LSTM with one shared layer l1 = LSTM(par_neurons, consume_less='gpu', init='glorot_uniform', return_sequences=True, dropout_W=par_dropout)(main_input) # the shared layer b1 = BatchNormalization()(l1) l2_1 = LSTM(par_neurons, consume_less='gpu', init='glorot_uniform', return_sequences=False, dropout_W=par_dropout)( b1) # the layer specialized in activity prediction b2_1 = BatchNormalization()(l2_1) l2_2 = LSTM(par_neurons, consume_less='gpu', init='glorot_uniform', return_sequences=False, dropout_W=par_dropout)(
def main(): start_time = time.time() parser = argparse.ArgumentParser( prog='trainLSTM_MLP.py', description='Train LSTM-MLP model for visual question answering') parser.add_argument('--mlp-hidden-units', type=int, default=1024, metavar='<mlp-hidden-units>') parser.add_argument('--lstm-hidden-units', type=int, default=512, metavar='<lstm-hidden-units>') parser.add_argument('--mlp-hidden-layers', type=int, default=3, metavar='<mlp-hidden-layers>') parser.add_argument('--lstm-hidden-layers', type=int, default=1, metavar='<lstm-hidden-layers>') parser.add_argument('--dropout', type=float, default=0.5, metavar='<dropout-rate>') parser.add_argument('--mlp-activation', type=str, default='tanh', metavar='<activation-function>') parser.add_argument('--num-epochs', type=int, default=100, metavar='<num-epochs>') parser.add_argument('--batch-size', type=int, default=128, metavar='<batch-size>') parser.add_argument('--learning-rate', type=float, default=0.001, metavar='<learning-rate>') parser.add_argument('--dev-accuracy-path', type=str, required=True, metavar='<accuracy-path>') args = parser.parse_args() word_vec_dim = 300 vgg_img_dim = 4096 inc_img_dim = 2048 max_len = 30 ###################### # Load Data # ###################### print('Loading data...') train_id_pairs, train_image_ids = LoadIds('train') dev_id_pairs, dev_image_ids = LoadIds('dev') train_questions = LoadQuestions('train') dev_questions = LoadQuestions('dev') train_choices = LoadChoices('train') dev_choices = LoadChoices('dev') train_answers = LoadAnswers('train') dev_answers = LoadAnswers('dev') print('Finished loading data.') print('Time: %f s' % (time.time() - start_time)) ###################### # Model Descriptions # ###################### print('Generating and compiling model...') # VGG model (VGG features) vgg_model = Sequential() vgg_model.add(Reshape(input_shape=(vgg_img_dim, ), dims=(vgg_img_dim, ))) # Inception model inception_model = Sequential() inception_model.add( Reshape(input_shape=(inc_img_dim, ), dims=(inc_img_dim, ))) # language model (LSTM) language_model = Sequential() if args.lstm_hidden_layers == 1: language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=False, input_shape=(max_len, word_vec_dim))) else: language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=True, input_shape=(max_len, word_vec_dim))) for i in range(args.lstm_hidden_layers - 2): language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=True)) language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=False)) # feedforward model (MLP) model = Sequential() model.add( Merge([language_model, vgg_model, inception_model], mode='concat', concat_axis=1)) for i in range(args.mlp_hidden_layers): model.add(Dense(args.mlp_hidden_units, init='uniform')) model.add(Activation(args.mlp_activation)) model.add(Dropout(args.dropout)) model.add(Dense(word_vec_dim)) model.add(Activation('softmax')) json_string = model.to_json() model_filename = 'models/2feats_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.1f' % ( args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation, args.learning_rate, args.dropout) open(model_filename + '.json', 'w').write(json_string) # loss and optimizer rmsprop = RMSprop(lr=args.learning_rate) model.compile(loss='categorical_crossentropy', optimizer=rmsprop) print('Compilation finished.') print('Time: %f s' % (time.time() - start_time)) ######################################## # Load CNN Features and Word Vectors # ######################################## # load VGG features print('Loading VGG features...') VGG_features, vgg_img_map = LoadVGGFeatures() print('VGG features loaded') print('Time: %f s' % (time.time() - start_time)) # load Inception features print('Loading Inception features...') INC_features, inc_img_map = LoadInceptionFeatures() print('Inception features loaded') print('Time: %f s' % (time.time() - start_time)) # load GloVe vectors print('Loading GloVe vectors...') word_embedding, word_map = LoadGloVe() print('GloVe vectors loaded') print('Time: %f s' % (time.time() - start_time)) ###################### # Make Batches # ###################### print('Making batches...') # training batches train_question_batches = [ b for b in MakeBatches( train_questions, args.batch_size, fillvalue=train_questions[-1]) ] train_answer_batches = [ b for b in MakeBatches(train_answers['toks'], args.batch_size, fillvalue=train_answers['toks'][-1]) ] train_image_batches = [ b for b in MakeBatches( train_image_ids, args.batch_size, fillvalue=train_image_ids[-1]) ] train_indices = list(range(len(train_question_batches))) # validation batches dev_question_batches = [ b for b in MakeBatches( dev_questions, args.batch_size, fillvalue=dev_questions[-1]) ] dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], args.batch_size, fillvalue=dev_answers['labs'][-1]) ] dev_choice_batches = [ b for b in MakeBatches( dev_choices, args.batch_size, fillvalue=dev_choices[-1]) ] dev_image_batches = [ b for b in MakeBatches( dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1]) ] print('Finished making batches.') print('Time: %f s' % (time.time() - start_time)) ###################### # Training # ###################### acc_file = open(args.dev_accuracy_path, 'w') dev_accs = [] max_acc = -1 max_acc_epoch = -1 # define interrupt handler def PrintDevAcc(): print('Max validation accuracy epoch: %i' % max_acc_epoch) print(dev_accs) def InterruptHandler(sig, frame): print(str(sig)) PrintDevAcc() sys.exit(-1) signal.signal(signal.SIGINT, InterruptHandler) signal.signal(signal.SIGTERM, InterruptHandler) # print training information print('-' * 80) print('Training Information') print('# of LSTM hidden units: %i' % args.lstm_hidden_units) print('# of LSTM hidden layers: %i' % args.lstm_hidden_layers) print('# of MLP hidden units: %i' % args.mlp_hidden_units) print('# of MLP hidden layers: %i' % args.mlp_hidden_layers) print('Dropout: %f' % args.dropout) print('MLP activation function: %s' % args.mlp_activation) print('# of training epochs: %i' % args.num_epochs) print('Batch size: %i' % args.batch_size) print('Learning rate: %f' % args.learning_rate) print('# of train questions: %i' % len(train_questions)) print('# of dev questions: %i' % len(dev_questions)) print('-' * 80) acc_file.write('-' * 80 + '\n') acc_file.write('Training Information\n') acc_file.write('# of LSTM hidden units: %i\n' % args.lstm_hidden_units) acc_file.write('# of LSTM hidden layers: %i\n' % args.lstm_hidden_layers) acc_file.write('# of MLP hidden units: %i\n' % args.mlp_hidden_units) acc_file.write('# of MLP hidden layers: %i\n' % args.mlp_hidden_layers) acc_file.write('Dropout: %f\n' % args.dropout) acc_file.write('MLP activation function: %s\n' % args.mlp_activation) acc_file.write('# of training epochs: %i\n' % args.num_epochs) acc_file.write('Batch size: %i\n' % args.batch_size) acc_file.write('Learning rate: %f\n' % args.learning_rate) acc_file.write('# of train questions: %i\n' % len(train_questions)) acc_file.write('# of dev questions: %i\n' % len(dev_questions)) acc_file.write('-' * 80 + '\n') # start training print('Training started...') for k in range(args.num_epochs): print('-' * 80) print('Epoch %i' % (k + 1)) progbar = generic_utils.Progbar(len(train_indices) * args.batch_size) # shuffle batch indices random.shuffle(train_indices) for i in train_indices: X_question_batch = GetQuestionsTensor(train_question_batches[i], word_embedding, word_map) X_vgg_image_batch = GetImagesMatrix(train_image_batches[i], vgg_img_map, VGG_features) X_inc_image_batch = GetImagesMatrix(train_image_batches[i], inc_img_map, INC_features) Y_answer_batch = GetAnswersMatrix(train_answer_batches[i], word_embedding, word_map) loss = model.train_on_batch( [X_question_batch, X_vgg_image_batch, X_inc_image_batch], Y_answer_batch) loss = loss[0].tolist() progbar.add(args.batch_size, values=[('train loss', loss)]) print('Time: %f s' % (time.time() - start_time)) # evaluate on dev set pbar = generic_utils.Progbar( len(dev_question_batches) * args.batch_size) dev_correct = 0 # feed forward for i in range(len(dev_question_batches)): X_question_batch = GetQuestionsTensor(dev_question_batches[i], word_embedding, word_map) X_vgg_image_batch = GetImagesMatrix(dev_image_batches[i], vgg_img_map, VGG_features) X_inc_image_batch = GetImagesMatrix(dev_image_batches[i], inc_img_map, INC_features) prob = model.predict_proba( [X_question_batch, X_vgg_image_batch, X_inc_image_batch], args.batch_size, verbose=0) # get word vecs of choices choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding, word_map) similarity = np.zeros((5, args.batch_size), float) # calculate cosine distances for j in range(5): similarity[j] = np.diag( cosine_similarity(prob, choice_feats[j])) # take argmax of cosine distances pred = np.argmax(similarity, axis=0) + 1 if i != (len(dev_question_batches) - 1): dev_correct += np.count_nonzero(dev_answer_batches[i] == pred) else: num_padding = args.batch_size * len( dev_question_batches) - len(dev_questions) last_idx = args.batch_size - num_padding dev_correct += np.count_nonzero( dev_answer_batches[:last_idx] == pred[:last_idx]) pbar.add(args.batch_size) dev_acc = float(dev_correct) / len(dev_questions) dev_accs.append(dev_acc) print('Validation Accuracy: %f' % dev_acc) print('Time: %f s' % (time.time() - start_time)) if dev_acc > max_acc: max_acc = dev_acc max_acc_epoch = k model.save_weights(model_filename + '_best.hdf5', overwrite=True) model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k + 1)) print(dev_accs) for acc in dev_accs: acc_file.write('%f\n' % acc) print('Best validation accuracy: %f; epoch#%i' % (max_acc, (max_acc_epoch + 1))) acc_file.write('Best validation accuracy: %f; epoch#%i\n' % (max_acc, (max_acc_epoch + 1))) print('Training finished.') acc_file.write('Training finished.\n') print('Time: %f s' % (time.time() - start_time)) acc_file.write('Time: %f s\n' % (time.time() - start_time)) acc_file.close()
ACTIVATION = 'tanh' LOSS = 'mean_squared_error' OPTIMIZER = 'adam' # Load the data. accuracies = [] for speaker in SPEAKERS: features_test, labels_test, vocab = get_features_test(speakers=[speaker], is_single=True, is_demo=False) features_train, labels_train, vocab = get_features_train( speakers=[speaker], is_single=True) model = Sequential() model.add( LSTM(128, input_shape=features_train[0].shape, return_sequences=True)) model.add(LSTM(128)) model.add(Dense(len(vocab), activation=ACTIVATION)) model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=['accuracy']) print(f"Current Speaker Training: {speaker}") print(model.summary()) history = model.fit( features_train, labels_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=True, validation_split=0.2 ) # 0.18 znaci da je ukupan dataset splitovan u ondnosu 70/15/15
def model_lstm(len_seq=30, im_size=(40, 40), fc_size=128, save_weight='untrained_weight.h5', save_topo='untrained_topo.json', save_result=True, lr=0.001, momentum=0.6, decay=0.0005, nesterov=True, rho=0.9, epsilon=1e-6, opt='sgd', load_cache=False, cnn=False, dict_size=53, filter_len=5): try: if load_cache: return read_lstm(weights_filename=save_weight, topo_filename=save_topo) except: pass start_time = time.time() #Starting LSTM Model here model = Sequential() model.add(Dense(fc_size, input_shape=(len_seq, im_size[0] * im_size[1]))) # Masking layer model.add(Masking(mask_value=0.0)) #First LSTM layer model.add(LSTM(fc_size, return_sequences=True)) # Second LSTM layer model.add(LSTM(fc_size, return_sequences=False)) # Final Dense layer model.add(Dense(dict_size)) #softmax layer model.add(Activation('softmax')) #Build and pass optimizer if opt == 'sgd': optimizer = SGD(lr=lr, momentum=momentum, decay=decay, nesterov=nesterov) model.compile(loss='categorical_crossentropy', optimizer=optimizer) end_time = time.time() print(" Total time for compilation %d" % (end_time - start_time)) if save_result: save_lstm(model, save_weight, save_topo) return model
for k, comp in enumerate(tag_index): if comp == col: tag_row.append(k) tag.append(tag_row) #print(str(i) + "-th row:" + str(tag[i])) tag = MultiLabelBinarizer().fit_transform(tag) #print(tag.shape) ########## Layers ######### print('Start Building Model...') model = Sequential() model.add(Embedding(len(text_index) + 1, 1024, input_length=306)) model.add(LSTM(128, activation='relu', dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(128, activation='relu')) model.add(Dense(128, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(38, activation='sigmoid')) ''' embedding_layer = Embedding(len(text_index), 64, input_length = 306, trainable = False) ''' ########## Compilation ########### adamax = Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(loss='binary_crossentropy', metrics=[fmeasure, precision, recall],
from keras.layers.core import Dense, Activation, Dropout from keras.layers.recurrent import LSTM from keras.models import Sequential import lstm, time # Step 1: load data X_train, y_train, X_test, y_test = lstm.load_data('close.csv', 50, True) # Step 2: build model model = Sequential() model.add(LSTM(input_dim=1, output_dim=50, return_sequences=True)) model.add(Dropout(0.2)) model.add(LSTM(100, return_sequences=False)) model.add(Dropout(0.2)) model.add(Dense(output_dim=1)) model.add(Activation('linear')) start = time.time() model.compile(loss='mse', optimizer='rmsprop') print('compilation time : %f\n' % (time.time() - start)) # Step 3: train the model model.fit(X_train, y_train, batch_size=128, nb_epoch=1, validation_split=0.05) #model.fit(X_train, y_train, batch_size=512, nb_epoch=1, validation_split=0.05) # Step 4: plot the predictions predictions = lstm.predict_sequences_multiple(model, X_test, 50, 50) lstm.plot_results_multiple(predictions, y_test, 50)
def main(): input_text = ['1 2 3 4 5' , '6 7 8 9 10' , '11 12 13 14 15' , '16 17 18 19 20' , '21 22 23 24 25'] tar_text = ['one two three four five' , 'six seven eight nine ten' , 'eleven twelve thirteen fourteen fifteen' , 'sixteen seventeen eighteen nineteen twenty' , 'twenty_one twenty_two twenty_three twenty_four twenty_five'] input_list = [] tar_list = [] for tmp_input in input_text: input_list.append(tokenize(tmp_input)) for tmp_tar in tar_text: tar_list.append(tokenize(tmp_tar)) vocab = sorted(reduce(lambda x, y: x | y, (set(tmp_list) for tmp_list in input_list + tar_list))) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 # keras进行embedding的时候必须进行len(vocab)+1 input_maxlen = max(map(len, (x for x in input_list))) tar_maxlen = max(map(len, (x for x in tar_list))) output_dim = vocab_size hidden_dim = 20 print('-') print('Vocab size:', vocab_size, 'unique words') print('Input max length:', input_maxlen, 'words') print('Target max length:', tar_maxlen, 'words') print('Dimension of hidden vectors:', hidden_dim) print('Number of training stories:', len(input_list)) print('Number of test stories:', len(input_list)) print('-') print('Vectorizing the word sequences...') word_to_idx = dict((c, i + 1) for i, c in enumerate(vocab)) # 编码时需要将字符映射成数字index idx_to_word = dict((i + 1, c) for i, c in enumerate(vocab)) # 解码时需要将数字index映射成字符 inputs_train, tars_train = vectorize_stories(input_list, tar_list, word_to_idx, input_maxlen, tar_maxlen, vocab_size) decoder_mode = 1 # 0 最简单模式,1 [1]向后模式,2 [2] Peek模式,3 [3]Attention模式 if decoder_mode == 3: encoder_top_layer = LSTM(hidden_dim, return_sequences=True) else: encoder_top_layer = LSTM(hidden_dim) if decoder_mode == 0: decoder_top_layer = LSTM(hidden_dim, return_sequences=True) decoder_top_layer.get_weights() elif decoder_mode == 1: decoder_top_layer = LSTMDecoder(hidden_dim=hidden_dim, output_dim=hidden_dim , output_length=tar_maxlen, state_input=False, return_sequences=True) elif decoder_mode == 2: decoder_top_layer = LSTMDecoder2(hidden_dim=hidden_dim, output_dim=hidden_dim , output_length=tar_maxlen, state_input=False, return_sequences=True) elif decoder_mode == 3: decoder_top_layer = AttentionDecoder(hidden_dim=hidden_dim, output_dim=hidden_dim , output_length=tar_maxlen, state_input=False, return_sequences=True) en_de_model = Sequential() en_de_model.add(Embedding(input_dim=vocab_size, output_dim=hidden_dim, input_length=input_maxlen)) en_de_model.add(encoder_top_layer) if decoder_mode == 0: en_de_model.add(RepeatVector(tar_maxlen)) en_de_model.add(decoder_top_layer) en_de_model.add(TimeDistributedDense(output_dim)) en_de_model.add(Activation('softmax')) print('Compiling...') time_start = time.time() en_de_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') time_end = time.time() print('Compiled, cost time:%fsecond!' % (time_end - time_start)) for iter_num in range(5000): en_de_model.fit(inputs_train, tars_train, batch_size=3, nb_epoch=1, show_accuracy=True) out_predicts = en_de_model.predict(inputs_train) for i_idx, out_predict in enumerate(out_predicts): predict_sequence = [] for predict_vector in out_predict: next_index = np.argmax(predict_vector) next_token = idx_to_word[next_index] predict_sequence.append(next_token) print('Target output:', tar_text[i_idx]) print('Predict output:', predict_sequence) print('Current iter_num is:%d' % iter_num)
if __name__ == "__main__": #This neural network is the the Q-function, run it like this: #model.predict(state.reshape(1,64), batch_size=1) batch_size = 7 num_features = 7 epochs = 10 gamma = 0.95 # since the reward can be several time steps away, make gamma high epsilon = 1 batchSize = 100 buffer = 200 replay = [] learning_progress = [] model = Sequential() model.add(LSTM(64, input_shape=(1, num_features), return_sequences=True, stateful=False)) model.add(Dropout(0.5)) model.add(LSTM(64, input_shape=(1, num_features), return_sequences=False, stateful=False)) model.add(Dropout(0.5)) model.add(Dense(7, init='lecun_uniform')) model.add(Activation('linear')) #linear output so we can have range of real-valued outputs rms = RMSprop() adam = Adam() model.compile(loss='mse', optimizer=adam)
def build(self): input_leng, input_dim = self.input_shape[1:] self.input = T.tensor3() if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) else: raise ValueError('this inner_rnn is not implemented yet.') self.rnn.build() # initial memory, state, read and write vecotrs self.M = theano.shared((.001 * np.ones((1,)).astype(floatX))) print(self.M) self.init_h = K.zeros((self.output_dim)) self.init_wr = self.rnn.init((self.n_slots,)) self.init_ww = self.rnn.init((self.n_slots,)) # write self.W_e = self.rnn.init((self.output_dim, self.m_length)) # erase self.b_e = K.zeros((self.m_length)) self.W_a = self.rnn.init((self.output_dim, self.m_length)) # add self.b_a = K.zeros((self.m_length)) # get_w parameters for reading operation self.W_k_read = self.rnn.init((self.output_dim, self.m_length)) self.b_k_read = self.rnn.init((self.m_length,)) self.W_c_read = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_read = K.zeros((3)) self.W_s_read = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_read = K.zeros((self.shift_range)) # b_s lol! not intentional # get_w parameters for writing operation self.W_k_write = self.rnn.init((self.output_dim, self.m_length)) self.b_k_write = self.rnn.init((self.m_length,)) self.W_c_write = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = K.zeros((3)) self.W_s_write = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_write = K.zeros((self.shift_range)) self.C = _circulant(self.n_slots, self.shift_range) self.trainable_weights = self.rnn.trainable_weights + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.output_dim)) self.trainable_weights = self.trainable_weights + [self.init_c, ]
# print (X_train.shape) # print (Y_train.shape) # print (X_test.shape) # print (Y_test.shape) # # X_train = np.random.randn(10,20,3) # # Y_train = np.random.randn(10,20) # print (X_train.shape) # print ("++++++++") # print (Y_train.shape) learning_rate = 0.001 model = Sequential() model.add(LSTM( input_dim=5, output_dim=512, return_sequences=True)) model.add(Dropout(0.5)) model.add(LSTM( 512, return_sequences=False)) model.add(Dropout(0.5)) model.add(Dense( output_dim=1)) model.add(Activation('linear')) start = time.time() model.compile(loss="mean_squared_error", optimizer=Adam(learning_rate)) print ('compilation time : ', time.time() - start)
def _build_network(self, vocab_size, maxlen, emb_weights=[], c_emb_weights=[], hidden_units=256, trainable=True, batch_size=1): print('Building model...') context_input = Input(name='context', batch_shape=(batch_size, maxlen)) if (len(c_emb_weights) == 0): c_emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(context_input) else: c_emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[c_emb_weights], trainable=trainable)(context_input) c_cnn1 = Convolution1D(int(hidden_units / 2), 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen))(c_emb) c_cnn2 = Convolution1D(hidden_units, 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen - 2))( c_cnn1) c_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=False)(c_cnn2) c_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=.0, unit_forget_bias=False, return_sequences=False, go_backwards=True)(c_cnn2) c_merged = add([c_lstm1, c_lstm2]) c_merged = Dropout(0.25)(c_merged) text_input = Input(name='text', batch_shape=(batch_size, maxlen)) if (len(emb_weights) == 0): emb = Embedding(vocab_size, 256, input_length=maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(text_input) else: emb = Embedding(vocab_size, c_emb_weights.shape[1], input_length=maxlen, weights=[emb_weights], trainable=trainable)(text_input) t_cnn1 = Convolution1D(int(hidden_units / 2), 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen))(emb) t_cnn2 = Convolution1D(hidden_units, 5, kernel_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', padding='valid', use_bias=True, input_shape=(1, maxlen - 2))( t_cnn1) t_lstm1 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=False)(t_cnn2) t_lstm2 = LSTM(hidden_units, kernel_initializer='he_normal', recurrent_initializer='he_normal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), dropout=0.25, recurrent_dropout=0.25, unit_forget_bias=False, return_sequences=False, go_backwards=True)(t_cnn2) t_merged = add([t_lstm1, t_lstm2]) t_merged = Dropout(0.25)(t_merged) awc_input = Input(name='awc', batch_shape=(batch_size, 11)) t_merged = Reshape((-1, 1))(t_merged) t_merged = multiply([t_merged, awc_input]) t_merged = Flatten()(t_merged) merged = concatenate([c_merged, t_merged], axis=1) dnn_1 = Dense(hidden_units, kernel_initializer="he_normal", activation='sigmoid')(merged) dnn_1 = Dropout(0.25)(dnn_1) dnn_2 = Dense(2, activation='sigmoid')(dnn_1) softmax = Activation('softmax')(dnn_2) model = Model(inputs=[context_input, text_input, awc_input], outputs=softmax) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('No of parameter:', model.count_params()) print(model.summary()) return model
class NeuralTuringMachine(Recurrent): def __init__(self, output_dim, memory_size, shift_range=3, init='glorot_uniform', inner_init='orthogonal', input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.n_slots = memory_size[1] self.m_length = memory_size[0] self.shift_range = shift_range self.init = init self.inner_init = inner_init self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(NeuralTuringMachine, self).__init__(**kwargs) def build(self, input_shape): input_leng, input_dim = input_shape[1:] # self.input = T.tensor3() self.lstm = LSTM( input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) self.lstm.build(input_shape) # initial memory, state, read and write vecotrs self.M = theano.shared((.001 * np.ones((1,)).astype(floatX))) self.init_h = backend.zeros((self.output_dim)) self.init_wr = self.lstm.init((self.n_slots,)) self.init_ww = self.lstm.init((self.n_slots,)) # write self.W_e = self.lstm.init((self.output_dim, self.m_length)) # erase self.b_e = backend.zeros((self.m_length)) self.W_a = self.lstm.init((self.output_dim, self.m_length)) # add self.b_a = backend.zeros((self.m_length)) # get_w parameters for reading operation self.W_k_read = self.lstm.init((self.output_dim, self.m_length)) self.b_k_read = self.lstm.init((self.m_length,)) self.W_c_read = self.lstm.init((self.output_dim, 3)) self.b_c_read = backend.zeros((3)) self.W_s_read = self.lstm.init((self.output_dim, self.shift_range)) self.b_s_read = backend.zeros((self.shift_range)) # b_s lol! not intentional # get_w parameters for writing operation self.W_k_write = self.lstm.init((self.output_dim, self.m_length)) self.b_k_write = self.lstm.init((self.m_length,)) self.W_c_write = self.lstm.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = backend.zeros((3)) self.W_s_write = self.lstm.init((self.output_dim, self.shift_range)) self.b_s_write = backend.zeros((self.shift_range)) self.C = circulant(self.n_slots, self.shift_range) self.trainable_weights = self.lstm.trainable_weights + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww] self.init_c = backend.zeros((self.output_dim)) self.trainable_weights = self.trainable_weights + [self.init_c, ] def read(self, w, M): return (w[:, :, None] * M).sum(axis=1) def write(self, w, e, a, M): Mtilda = M * (1 - w[:, :, None] * e[:, None, :]) Mout = Mtilda + w[:, :, None] * a[:, None, :] return Mout def get_content_w(self, beta, k, M): num = beta[:, None] * cosine_similarity(M, k) return soft_max(num) def get_location_w(self, g, s, C, gamma, wc, w_tm1): wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1 Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3) wtilda = (Cs * s[:, :, None]).sum(axis=1) wout = re_norm(wtilda ** gamma[:, None]) return wout def get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s): k = T.tanh(T.dot(h, W_k) + b_k) # + 1e-6 c = T.dot(h, W_c) + b_c beta = T.nnet.relu(c[:, 0]) + 1e-4 g = T.nnet.sigmoid(c[:, 1]) gamma = T.nnet.relu(c[:, 2]) + 1.0001 s = T.nnet.softmax(T.dot(h, W_s) + b_s) return k, beta, g, gamma, s def get_output_shape_for(self, input_shape): if self.return_sequences: return input_shape[0], input_shape[1], self.output_dim else: return input_shape[0], self.output_dim def call(self, x, mask = None): M_tm1, wr_tm1, ww_tm1 = mask[:3] # reshape M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length)) # read h_tm1 = mask[3:] k_read, beta_read, g_read, gamma_read, s_read = self.get_controller_output( h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read) wc_read = self.get_content_w(beta_read, k_read, M_tm1) wr_t = self.get_location_w(g_read, s_read, self.C, gamma_read, wc_read, wr_tm1) M_read = self.read(wr_t, M_tm1) # update controller h_t = update_controller(self, x, h_tm1, M_read) # write k_write, beta_write, g_write, gamma_write, s_write = self.get_controller_output( h_t[0], self.W_k_write, self.b_k_write, self.W_c_write, self.b_c_write, self.W_s_write, self.b_s_write) wc_write = self.get_content_w(beta_write, k_write, M_tm1) ww_t = self.get_location_w(g_write, s_write, self.C, gamma_write, wc_write, ww_tm1) e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e) a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a) M_t = self.write(ww_t, e, a, M_tm1) M_t = M_t.flatten(ndim=2) return h_t[0], [M_t, wr_t, ww_t] + h_t
# Keras Model model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add(Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)) model.add(Dropout(0.25)) model.add(Convolution1D(nb_filter=nb_filter, filter_length=kernel_size, border_mode='valid', activation='relu', subsample_length=1 )) model.add(MaxPooling1D(pool_length=2)) # lstm layer: model.add(LSTM(hidden_dim)) # We project onto a single unit output layer, and squash it with a sigmoid: model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='sgd') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch) y_pred = model.predict(X_test, batch_size=batch_size).flatten() for i in range(len(y_pred)): if y_pred[i] >= 0.5: y_pred[i] = 1 else: y_pred[i] = 0
class NeuralTuringMachine(Recurrent): def __init__(self, output_dim, memory_size, shift_range=3, init='glorot_uniform', inner_init='orthogonal', input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.n_slots = memory_size[1] self.m_length = memory_size[0] self.shift_range = shift_range self.init = init self.inner_init = inner_init self.input_dim = input_dim self.input_length = input_length self.u = None if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(NeuralTuringMachine, self).__init__(**kwargs) def build(self, input_shape): self.u = input_shape input_leng, input_dim = input_shape[1:] # self.input = T.tensor3() self.rnn = LSTM( input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) self.rnn.build(input_shape) self.M = theano.shared((.001 * np.ones((1,)).astype(floatX))) self.init_h = K.zeros((self.output_dim)) self.init_wr = self.rnn.init((self.n_slots,)) self.init_ww = self.rnn.init((self.n_slots,)) # write self.W_e = self.rnn.init((self.output_dim, self.m_length)) # erase self.b_e = K.zeros((self.m_length)) self.W_a = self.rnn.init((self.output_dim, self.m_length)) # add self.b_a = K.zeros((self.m_length)) # get_w parameters for reading operation self.W_k_read = self.rnn.init((self.output_dim, self.m_length)) self.b_k_read = self.rnn.init((self.m_length,)) self.W_c_read = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_read = K.zeros((3)) self.W_s_read = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_read = K.zeros((self.shift_range)) # b_s lol! not intentional # get_w parameters for writing operation self.W_k_write = self.rnn.init((self.output_dim, self.m_length)) self.b_k_write = self.rnn.init((self.m_length,)) self.W_c_write = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = K.zeros((3)) self.W_s_write = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_write = K.zeros((self.shift_range)) self.C = _circulant(self.n_slots, self.shift_range) self.trainable_weights = self.rnn.trainable_weights + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww] self.init_c = K.zeros((self.output_dim)) self.trainable_weights = self.trainable_weights + [self.init_c, ] def _read(self, w, M): return (w[:, :, None] * M).sum(axis=1) def _write(self, w, e, a, M): Mtilda = M * (1 - w[:, :, None] * e[:, None, :]) Mout = Mtilda + w[:, :, None] * a[:, None, :] return Mout def _get_content_w(self, beta, k, M): num = beta[:, None] * _cosine_distance(M, k) return _softmax(num) def _get_location_w(self, g, s, C, gamma, wc, w_tm1): wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1 Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3) wtilda = (Cs * s[:, :, None]).sum(axis=1) wout = _renorm(wtilda ** gamma[:, None]) return wout def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s): k = T.tanh(T.dot(h, W_k) + b_k) # + 1e-6 c = T.dot(h, W_c) + b_c beta = T.nnet.relu(c[:, 0]) + 1e-4 g = T.nnet.sigmoid(c[:, 1]) gamma = T.nnet.relu(c[:, 2]) + 1.0001 s = T.nnet.softmax(T.dot(h, W_s) + b_s) return k, beta, g, gamma, s def get_initial_states(self, X): batch_size = X.shape[0] init_M = self.M.dimshuffle(0, 'x', 'x').repeat( batch_size, axis=0).repeat(self.n_slots, axis=1).repeat( self.m_length, axis=2) init_M = init_M.flatten(ndim=2) init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0) return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h, init_c] @property def output_shape(self): input_shape = self.input_shape if self.return_sequences: return input_shape[0], input_shape[1], self.output_dim else: return input_shape[0], self.output_dim def call(self, x, mask=None): input_shape = self.u print(input_shape) if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output def step(self, x, states): M_tm1, wr_tm1, ww_tm1 = states[:3] # reshape M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length)) # read h_tm1 = states[3:] k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output( h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read) wc_read = self._get_content_w(beta_read, k_read, M_tm1) wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read, wc_read, wr_tm1) M_read = self._read(wr_t, M_tm1) # update controller h_t = _update_controller(self, x, h_tm1, M_read) # write k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output( h_t[0], self.W_k_write, self.b_k_write, self.W_c_write, self.b_c_write, self.W_s_write, self.b_s_write) wc_write = self._get_content_w(beta_write, k_write, M_tm1) ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write, wc_write, ww_tm1) e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e) a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a) M_t = self._write(ww_t, e, a, M_tm1) M_t = M_t.flatten(ndim=2) return h_t[0], [M_t, wr_t, ww_t] + h_t
def get_lstm_layers(input, nhidden, drop_rate=0.3, nlayers=1, droph=False, activation='tanh', inner_activation='relu', bidirectional=False, stateful=False, return_sequences=True, init='orthogonal', bottleneck=None, prefix=""): logger.debug("get_lstm_layers") logger.debug("prefix %s" % prefix) xcurr = input if gethostname() == "schaffner.inf.ed.ac.uk": consume_less = "gpu" else: consume_less = "cpu" logger.debug(consume_less) if bottleneck != None: bottlelayer, bottlesize = bottleneck logger.info("****Bottleneck %d %d" % (bottlelayer, bottlesize)) else: bottlelayer, bottlesize = (None, None) for i in range(nlayers): if bottleneck != None: if i == bottlelayer: logger.info("bottleneck: %d %d %d" % (i, bottlelayer, bottlesize)) xcurr = TimeDistributed(Dense(bottlesize, activation=activation))(xcurr) logger.info("layer: %d" % i) xl = LSTM(nhidden, activation=activation, inner_activation=inner_activation, inner_init=init, #dropout_W=drop_rate, #dropout_U=drop_rate, #W_regularizer=l2(0.01), U_regularizer=l2(0.01), b_regularizer=l2(0.01), consume_less=consume_less, return_sequences=return_sequences, stateful=stateful) xl.name = prefix + xl.name xf = xl(xcurr) if droph: dl = Dropout(drop_rate) dl.name = prefix + dl.name xf = dl(xf) if bidirectional: print "Bidirectional" xbl = LSTM(nhidden, activation=activation, go_backwards=True, inner_activation=inner_activation, inner_init=init, # dropout_W=drop_rate, # dropout_U=drop_rate, #W_regularizer=l2(0.01), U_regularizer=l2(0.01), b_regularizer=l2(0.01), consume_less=consume_less, return_sequences=return_sequences, stateful=stateful) xbl.name = prefix + xbl.name xb = xbl(xcurr) if droph: dlb = Dropout(drop_rate) dlb.name = prefix + dlb.name xb = dlb(xb) xcurr = merge([xf, xb], mode='concat') #print type(xcurr) #print xcurr #xcurr.name = prefix + xcurr.name else: xcurr = xf return xcurr
def lrcn(self): """Build a CNN into RNN. Starting version from: https://github.com/udacity/self-driving-car/blob/master/ steering-models/community-models/chauffeur/models.py Heavily influenced by VGG-16: https://arxiv.org/abs/1409.1556 Also known as an LRCN: https://arxiv.org/pdf/1411.4389.pdf """ model = Sequential() model.add( TimeDistributed(Conv2D(32, (7, 7), strides=(2, 2), activation='relu', padding='same'), input_shape=self.input_shape)) model.add( TimeDistributed( Conv2D(32, (3, 3), kernel_initializer="he_normal", activation='relu'))) model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))) model.add( TimeDistributed( Conv2D(64, (3, 3), padding='same', activation='relu'))) model.add( TimeDistributed( Conv2D(64, (3, 3), padding='same', activation='relu'))) model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))) model.add( TimeDistributed( Conv2D(128, (3, 3), padding='same', activation='relu'))) model.add( TimeDistributed( Conv2D(128, (3, 3), padding='same', activation='relu'))) model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))) model.add( TimeDistributed( Conv2D(256, (3, 3), padding='same', activation='relu'))) model.add( TimeDistributed( Conv2D(256, (3, 3), padding='same', activation='relu'))) model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))) model.add( TimeDistributed( Conv2D(512, (3, 3), padding='same', activation='relu'))) model.add( TimeDistributed( Conv2D(512, (3, 3), padding='same', activation='relu'))) model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))) model.add(TimeDistributed(Flatten())) model.add(Dropout(0.5)) model.add(LSTM(256, return_sequences=False, dropout=0.5)) model.add(Dense(self.nb_classes, activation='softmax')) return model
random_state=43) batch_size = 32 epochs = 1 hash_bits = 128 def custom_activation(x): return (K.sigmoid(x) * 10) visible = Input(shape=(X.shape[1], X.shape[2])) blstm_1 = Bidirectional( LSTM(1024, dropout=0.1, recurrent_dropout=0.5, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))(visible) blstm_2 = Bidirectional( LSTM(1024, dropout=0.1, recurrent_dropout=0.5, input_shape=(X.shape[1], X.shape[2]), return_sequences=False))(blstm_1) Dense_2 = Dense(hash_bits, activation=custom_activation)(blstm_2) batchNorm = BatchNormalization()(Dense_2) enver = Dense(128, activation='sigmoid')(batchNorm) batchNorm2 = BatchNormalization()(enver) Dense_3 = Dense(4, activation='sigmoid')(batchNorm2) model = Model(input=visible, output=Dense_3) print(model.summary())
print('Loading data...') (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print("Pad sequences (samples x time)") X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Sequential() model.add(Embedding(max_features, 128, input_length=maxlen)) model.add(LSTM(128)) # try using a GRU instead, for fun model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") print("Train...") model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=3, validation_data=(X_test, y_test), show_accuracy=True) score, acc = model.evaluate(X_test, y_test, batch_size=batch_size, show_accuracy=True)
class Stack(Recurrent): """ Stack and queue network output_dim = output dimension n_slots = number of memory slot m_length = dimention of the memory rnn_size = output length of the memory controler inner_rnn = "lstm" only lstm is supported stack = True to create neural stack or False to create neural queue from Learning to Transduce with Unbounded Memory [[http://arxiv.org/pdf/1506.02516.pdf]] """ def __init__(self, output_dim, n_slots, m_length, inner_rnn='lstm',rnn_size=64, stack=True, init='glorot_uniform', inner_init='orthogonal', input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.n_slots = n_slots + 1 # because we start at time 1 self.m_length = m_length self.init = init self.inner_init = inner_init if inner_rnn != "lstm": print "Only lstm is supported" raise self.inner_rnn = inner_rnn self.rnn_size = rnn_size self.stack = stack self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(Stack, self).__init__(**kwargs) def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] input_leng, input_dim = input_shape[1:] if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name)) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.rnn_size, init=self.init, forget_bias_init='zero', inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name)) else: raise ValueError('this inner_rnn is not implemented yet.') inner_shape = list(input_shape) inner_shape[-1] = input_dim+self.m_length self.rnn.build(inner_shape) self.init_h = K.zeros((self.rnn_size), name="{}_init_h".format(self.name)) self.W_d = self.rnn.init((self.rnn_size,1), name="{}_W_d".format(self.name)) self.W_u = self.rnn.init((self.rnn_size,1), name="{}_W_u".format(self.name)) self.W_v = self.rnn.init((self.rnn_size,self.m_length), name="{}_W_v".format(self.name)) self.W_o = self.rnn.init((self.rnn_size,self.output_dim), name="{}_W_o".format(self.name)) self.b_d = K.zeros((1,), name="{}_b_d".format(self.name)) self.b_u = K.zeros((1,), name="{}_b_u".format(self.name)) self.b_v = K.zeros((self.m_length,), name="{}_b_v".format(self.name)) self.b_o = K.zeros((self.output_dim,), name="{}_b_o".format(self.name)) self.trainable_weights = self.rnn.trainable_weights + [ self.W_d, self.b_d, self.W_v, self.b_v, self.W_u, self.b_u, self.W_o, self.b_o, self.init_h] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.rnn_size), name="{}_init_c".format(self.name)) self.trainable_weights = self.trainable_weights + [self.init_c, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weight def get_initial_states(self, X): batch_size = X.shape[0] init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0) init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0) init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0) init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0) itime = K.zeros((1,),dtype=np.int32) if self.inner_rnn == 'lstm': init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0) return [init_r , init_V,init_S,itime,init_h,init_c] def get_output_shape_for(self, input_shape): if self.return_sequences: return input_shape[0], input_shape[1], self.output_dim else: return input_shape[0], self.output_dim def step(self, x, states): r_tm1, V_tm1,s_tm1,time = states[:4] h_tm1 = states[4:] op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1), h_tm1) d_t = K.sigmoid( K.dot(op_t, self.W_d) + self.b_d) u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u) v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) time = time + 1 V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], u_t[::,0], v_t,time[0],stack=self.stack) return o_t, [r_t, V_t, s_t, time] + h_t def get_config(self): config = {'output_dim': self.output_dim, 'n_slots': self.n_slots, 'm_length': self.m_length, 'init': self.init, 'inner_init': self.inner_init, 'inner_rnn ': self.inner_rnn, 'rnn_size': self.rnn_size, 'stack': self.stack} base_config = super(Stack, self).get_config() return dict(list(base_config.items()) + list(config.items()))
def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] input_leng, input_dim = input_shape[1:] if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name)) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, forget_bias_init='zero', inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name)) else: raise ValueError('this inner_rnn is not implemented yet.') inner_shape = list(input_shape) inner_shape[-1] = input_dim+self.m_length self.rnn.build(inner_shape) # initial memory, state, read and write vecotrs self.M = theano.shared((.001*np.ones((1,)).astype(floatX)), name="{}_M".format(self.name)) self.init_h = K.zeros((self.output_dim), name="{}_init_h".format(self.name)) self.init_wr = self.rnn.init((self.n_slots,), name="{}_init_wr".format(self.name)) self.init_ww = self.rnn.init((self.n_slots,), name="{}_init_ww".format(self.name)) # write self.W_e = self.rnn.init((self.output_dim, self.m_length), name="{}_W_e".format(self.name)) # erase self.b_e = K.zeros((self.m_length), name="{}_b_e".format(self.name)) self.W_a = self.rnn.init((self.output_dim, self.m_length), name="{}_W_a".format(self.name)) # add self.b_a = K.zeros((self.m_length), name="{}_b_a".format(self.name)) # get_w parameters for reading operation self.W_k_read = self.rnn.init((self.output_dim, self.m_length), name="{}_W_k_read".format(self.name)) self.b_k_read = self.rnn.init((self.m_length, ), name="{}_b_k_read".format(self.name)) self.W_c_read = self.rnn.init((self.output_dim, 3), name="{}_W_c_read".format(self.name)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_read = K.zeros((3), name="{}_b_c_read".format(self.name)) self.W_s_read = self.rnn.init((self.output_dim, self.shift_range), name="{}_W_s_read".format(self.name)) self.b_s_read = K.zeros((self.shift_range), name="{}_b_s_read".format(self.name)) # b_s lol! not intentional # get_w parameters for writing operation self.W_k_write = self.rnn.init((self.output_dim, self.m_length), name="{}_W_k_write".format(self.name)) self.b_k_write = self.rnn.init((self.m_length, ), name="{}_b_k_write".format(self.name)) self.W_c_write = self.rnn.init((self.output_dim, 3), name="{}_W_c_write".format(self.name)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = K.zeros((3), name="{}_b_c_write".format(self.name)) self.W_s_write = self.rnn.init((self.output_dim, self.shift_range), name="{}_W_s_write".format(self.name)) self.b_s_write = K.zeros((self.shift_range), name="{}_b_s_write".format(self.name)) self.C = _circulant(self.n_slots, self.shift_range) self.trainable_weights = self.rnn.trainable_weights + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.output_dim), name="{}_init_c".format(self.name)) self.trainable_weights = self.trainable_weights + [self.init_c, ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weight
def __init__(self): self.word2em = load_glove() print("Length of word2em :: ", len(self.word2em)) #print("start word :: \n ", self.word2em['start']) #self.target_word2idx = np.load( # '../chatbot_train/models/' + DATA_SET_NAME + '/word-glove-target-word2idx.npy').item() #self.target_idx2word = np.load( # '../chatbot_train/models/' + DATA_SET_NAME + '/word-glove-target-idx2word.npy').item() #context = np.load('../chatbot_train/models/' + DATA_SET_NAME + '/word-glove-context.npy').item() self.input_texts, self.target_texts, self.target_counter = read_input() for idx, (input_words, target_words) in enumerate( zip(self.input_texts, self.target_texts)): if idx > 10: break print([input_words, target_words]) self.target_word2idx, self.target_idx2word, self.context, input_texts_word2em = get_target( self) self.max_encoder_seq_length = self.context['encoder_max_seq_length'] self.max_decoder_seq_length = self.context['decoder_max_seq_length'] self.num_decoder_tokens = self.context['num_decoder_tokens'] print(self.context) encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='encoder_inputs') encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name="encoder_lstm") encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm( encoder_inputs) encoder_states = [encoder_state_h, encoder_state_c] decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') decoder_lstm = LSTM(units=HIDDEN_UNITS, return_sequences=True, return_state=True, name='decoder_lstm') decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_dense') decoder_outputs = decoder_dense(decoder_outputs) self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs) #plot_model(self.model, to_file='RNN_model.png', show_shapes=True) #self.model.load_weights('../chatbot_train/models/' + DATA_SET_NAME + '/word-glove-weights.h5') self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy') Xtrain, Xtest, Ytrain, Ytest = train_test_split(input_texts_word2em, self.target_texts, test_size=0.2, random_state=42) print("Length of train data:: ", len(Xtrain)) print("Length of test data:: ", len(Xtest)) train_gen = generate_batch(Xtrain, Ytrain, self) test_gen = generate_batch(Xtest, Ytest, self) train_num_batches = len(Xtrain) // BATCH_SIZE test_num_batches = len(Xtest) // BATCH_SIZE #checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True) self.model.fit_generator( generator=train_gen, steps_per_epoch=train_num_batches, epochs=NUM_EPOCHS, verbose=1, validation_data=test_gen, validation_steps=test_num_batches) #, callbacks=[checkpoint]) self.model.save_weights(WEIGHT_FILE_PATH) self.encoder_model = Model(encoder_inputs, encoder_states) decoder_state_inputs = [ Input(shape=(HIDDEN_UNITS, )), Input(shape=(HIDDEN_UNITS, )) ] decoder_outputs, state_h, state_c = decoder_lstm( decoder_inputs, initial_state=decoder_state_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
# 訓練用のデータと、テスト用のデータに分ける N_train = int(len(df) * 0.8) N_test = len(df) - N_train X_train, X_test, y_train, y_test = \ train_test_split(X, Y, test_size=N_test, shuffle = False) # 隠れ層の数などを定義: 隠れ層の数が大きいほど精度が上がる? n_in = 1 # len(X[0][0]) n_out = 1 # len(Y[0]) n_hidden = 300 #モデル作成 (Kerasのフレームワークで簡易に記載できる) model = Sequential() model.add(LSTM(n_hidden, batch_input_shape=(None, maxlen, n_in), kernel_initializer='random_uniform', return_sequences=False)) model.add(Dense(n_in, kernel_initializer='random_uniform')) model.add(Activation("linear")) opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999) model.compile(loss = "mean_squared_error", optimizer=opt) early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1) hist = model.fit(X_train, y_train, batch_size=maxlen, epochs=50, callbacks=[early_stopping]) # 損失のグラフ化 loss = hist.history['loss'] epochs = len(loss) plt.rc('font', family='serif')
model4.add( Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) model4.add(GlobalMaxPooling1D()) model4.add(Dropout(0.2)) model4.add(Dense(300)) model4.add(Dropout(0.2)) model4.add(BatchNormalization()) model5 = Sequential() model5.add(Embedding(len(word_index) + 1, 300, input_length=40, dropout=0.2)) model5.add(LSTM(300, dropout_W=0.2, dropout_U=0.2)) model6 = Sequential() model6.add(Embedding(len(word_index) + 1, 300, input_length=40, dropout=0.2)) model6.add(LSTM(300, dropout_W=0.2, dropout_U=0.2)) merged_model = Sequential() merged_model.add( Merge([model1, model2, model3, model4, model5, model6], mode='concat')) merged_model.add(BatchNormalization()) merged_model.add(Dense(300)) merged_model.add(PReLU()) merged_model.add(Dropout(0.2)) merged_model.add(BatchNormalization())
with open('conversation.pickle', 'rb') as f: vec_x, vec_y = pickle.load(f) vec_x = np.array(vec_x, dtype=np.float64) vec_y = np.array(vec_y, dtype=np.float64) x_train, x_test, y_train, y_test = train_test_split(vec_x, vec_y, test_size=0.2, random_state=1) model = Sequential() model.add( LSTM(output_dim=300, input_shape=x_train.shape[1:], return_sequences=True, init='glorot_normal', inner_init='glorot_normal', activation='sigmoid')) model.add( LSTM(output_dim=300, input_shape=x_train.shape[1:], return_sequences=True, init='glorot_normal', inner_init='glorot_normal', activation='sigmoid')) model.add( LSTM(output_dim=300, input_shape=x_train.shape[1:], return_sequences=True, init='glorot_normal', inner_init='glorot_normal',
yx = kMagnData.shape[0] yy = kMagnData.shape[1] yz = kMagnData.shape[2] ox = yConcat.shape[0] oy = yConcat.shape[1] oz = yConcat.shape[2] phiModel = Sequential() print yConcat.shape phiModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True)) phiModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True)) phiModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True)) phiModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True)) phiLstmLayer1 = LSTM(input_dim=yz, output_dim=yz, return_sequences=True) phiLstmLayer1.trainable = False phiModel.add(phiLstmLayer1) if os.path.isfile('./lstm-weights/phi-' + settings['lstm-file']): phiModel.load_weights('./lstm-weights/phi-' + settings['lstm-file']) phiModel = modelWeightsLoader(phiModel, './autoencoder-weights/' + settings['phase-encoder'] + '-phase-AE', {18:4}) phiModel.compile(loss='mean_squared_error', optimizer='rmsprop') magnModel = Sequential() magnModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True)) magnModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True)) magnModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True)) magnModel.add(LSTM(input_dim=yz, output_dim=yz, return_sequences=True)) magLstmLayer1 = LSTM(input_dim=yz, output_dim=yz, return_sequences=True)
def build_doc_scorer(self, r_query_idf, permute_idxs): p = self.p ng_fsizes = self.NGRAM_NFILTER maxpool_poses = self._cascade_poses() filter_sizes = list() added_fs = set() for ng in sorted(ng_fsizes): # n-gram in input for n_x, n_y in ng_fsizes[ng]: dim_name = self._get_dim_name(n_x, n_y) if dim_name not in added_fs: filter_sizes.append((n_x, n_y)) added_fs.add(dim_name) re_input, cov_sim_layers, pool_sdim_layer, pool_sdim_layer_context, pool_filter_layer, ex_filter_layer, re_lq_ds =\ self._cov_dsim_layers(p['simdim'], p['maxqlen'], filter_sizes, p['nfilter'], top_k=p['kmaxpool'], poses=maxpool_poses, selecter=p['distill']) query_idf = Reshape( (p['maxqlen'], 1))(Activation('softmax', name='softmax_q_idf')(Flatten()(r_query_idf))) if p['combine'] < 0: raise RuntimeError( "combine should be 0 (LSTM) or the number of feedforward dimensions" ) elif p['combine'] == 0: rnn_layer = LSTM(1, dropout=0.0, recurrent_regularizer=None, recurrent_dropout=0.0, unit_forget_bias=True, \ name="lstm_merge_score_idf", recurrent_activation="hard_sigmoid", bias_regularizer=None, \ activation="tanh", recurrent_initializer="orthogonal", kernel_regularizer=None, kernel_initializer="glorot_uniform") else: dout = Dense(1, name='dense_output') d1 = Dense(p['combine'], activation='relu', name='dense_1') d2 = Dense(p['combine'], activation='relu', name='dense_2') rnn_layer = lambda x: dout(d1(d2(Flatten()(x)))) def _permute_scores(inputs): scores, idxs = inputs return tf.gather_nd(scores, backend.cast(idxs, 'int32')) self.vis_out = None self.visout_count = 0 def _scorer(doc_inputs, dataid): self.visout_count += 1 self.vis_out = {} doc_qts_scores = [query_idf] for ng in sorted(ng_fsizes): if p['distill'] == 'firstk': input_ng = max(ng_fsizes) else: input_ng = ng for n_x, n_y in ng_fsizes[ng]: dim_name = self._get_dim_name(n_x, n_y) if n_x == 1 and n_y == 1: doc_cov = doc_inputs[input_ng] re_doc_cov = doc_cov else: doc_cov = cov_sim_layers[dim_name](re_input( doc_inputs[input_ng])) re_doc_cov = re_lq_ds[dim_name]( pool_filter_layer[dim_name](Permute( (1, 3, 2))(doc_cov))) self.vis_out['conv%s' % ng] = doc_cov if p['context']: ng_signal = pool_sdim_layer_context[dim_name]( [re_doc_cov, doc_inputs['context']]) else: ng_signal = pool_sdim_layer[dim_name](re_doc_cov) doc_qts_scores.append(ng_signal) if len(doc_qts_scores) == 1: doc_qts_score = doc_qts_scores[0] else: doc_qts_score = Concatenate(axis=2)(doc_qts_scores) if permute_idxs is not None: doc_qts_score = Lambda(_permute_scores)( [doc_qts_score, permute_idxs]) doc_score = rnn_layer(doc_qts_score) return doc_score return _scorer
def main(): f = open("X_train.pkl", 'r') X_train = pickle.load(f) ''' f=open('word2index.pkl','r') word2index=pickle.load(f) f=open('index2word.pkl','r') index2word=pickle.load(f) inputs_train, tars_train = vectorize_stories(X_train, X_train, word2index, maxlen, maxlen, vocab_size) ''' X_train=pad_sequences(X_train, maxlen=maxlen) decoder_mode = 1 # 0 最简单模式,1 [1]向后模式,2 [2] Peek模式,3 [3]Attention模式 if decoder_mode == 3: encoder_top_layer = LSTM(hidden_dim, return_sequences=True) else: encoder_top_layer = LSTM(hidden_dim) if decoder_mode == 0: decoder_top_layer = LSTM(hidden_dim, return_sequences=True) decoder_top_layer.get_weights() elif decoder_mode == 1: decoder_top_layer = LSTMDecoder(hidden_dim=hidden_dim, output_dim=hidden_dim , output_length=maxlen, state_input=False, return_sequences=True) elif decoder_mode == 2: decoder_top_layer = LSTMDecoder2(hidden_dim=hidden_dim, output_dim=hidden_dim , output_length=maxlen, state_input=False, return_sequences=True) elif decoder_mode == 3: decoder_top_layer = AttentionDecoder(hidden_dim=hidden_dim, output_dim=hidden_dim , output_length=maxlen, state_input=False, return_sequences=True) en_de_model = Sequential() en_de_model.add(Embedding(input_dim=vocab_size, output_dim=hidden_dim, input_length=maxlen)) en_de_model.add(encoder_top_layer) if decoder_mode == 0: en_de_model.add(RepeatVector(maxlen)) en_de_model.add(decoder_top_layer) en_de_model.add(TimeDistributedDense(vocab_size)) en_de_model.add(Activation('softmax')) print('Compiling...') time_start = time.time() en_de_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') time_end = time.time() print('Compiled, cost time: %f second!' % (time_end - time_start)) for iter_num in range(5000): en_de_model.fit(X_train, X_train, batch_size=3, nb_epoch=1, show_accuracy=True) out_predicts = en_de_model.predict(X_train) for i_idx, out_predict in enumerate(out_predicts): predict_sequence = [] ''' for predict_vector in out_predict: next_index = np.argmax(predict_vector) next_token = index2word[next_index] predict_sequence.append(next_token) ''' print('Target output:', X_train[i_idx]) print('Predict output:', predict_sequence) print('Current iter_num is:%d' % iter_num)
data = data[:, :n_grams] # making labels labels = data[:, -1] lables_final = np.zeros([len(labels), max(lables)+1]) for i in range(len(lables_final)): lables_final[i][int(labels[i])] = 1.0 ''' ML Part ''' model = Sequential() model.add(Embedding(len(id2char), 128, input_length = input_length)) model.add(LSTM(64)) model.add(Dropout(0.2)) model.add(Dense(128)) model.add(Dense(len(id2char), activation = 'softmax')) print(model.summary()) model.compile('RMSProp', loss = 'categorical_crossentropy', metrics = ['accuracy']) model.fit(data, lables_final, epochs = 1, batch_size = 5000, validation_split = 0.2) input_string = u'No part of this book may be reproduced or transmitted in any form or by any means' input_string = input_string[:50] print(input_string, end = '') # Encoding for i in range(len(input_string)): input_string[i] = char2id[input_string[i]]
class NeuralTuringMachine(Recurrent): """ Neural Turing Machines Non obvious parameter: ---------------------- shift_range: int, number of available shifts, ex. if 3, avilable shifts are (-1, 0, 1) n_slots: number of memory locations m_length: memory length at each location Known issues: ------------- Theano may complain when n_slots == 1. """ def __init__(self, output_dim, n_slots, m_length, shift_range=3, inner_rnn='gru', init='glorot_uniform', inner_init='orthogonal', input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.n_slots = n_slots self.m_length = m_length self.shift_range = shift_range self.init = init self.inner_init = inner_init self.inner_rnn = inner_rnn self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(NeuralTuringMachine, self).__init__(**kwargs) def build(self): input_leng, input_dim = self.input_shape[1:] self.input = T.tensor3() if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) else: raise ValueError('this inner_rnn is not implemented yet.') self.rnn.build() # initial memory, state, read and write vecotrs self.M = theano.shared((.001*np.ones((1,)).astype(floatX))) self.init_h = K.zeros((self.output_dim)) self.init_wr = self.rnn.init((self.n_slots,)) self.init_ww = self.rnn.init((self.n_slots,)) # write self.W_e = self.rnn.init((self.output_dim, self.m_length)) # erase self.b_e = K.zeros((self.m_length)) self.W_a = self.rnn.init((self.output_dim, self.m_length)) # add self.b_a = K.zeros((self.m_length)) # get_w parameters for reading operation self.W_k_read = self.rnn.init((self.output_dim, self.m_length)) self.b_k_read = self.rnn.init((self.m_length, )) self.W_c_read = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_read = K.zeros((3)) self.W_s_read = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_read = K.zeros((self.shift_range)) # b_s lol! not intentional # get_w parameters for writing operation self.W_k_write = self.rnn.init((self.output_dim, self.m_length)) self.b_k_write = self.rnn.init((self.m_length, )) self.W_c_write = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = K.zeros((3)) self.W_s_write = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_write = K.zeros((self.shift_range)) self.C = _circulant(self.n_slots, self.shift_range) self.trainable_weights = self.rnn.trainable_weights + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.output_dim)) self.trainable_weights = self.trainable_weights + [self.init_c, ] def _read(self, w, M): return (w[:, :, None]*M).sum(axis=1) def _write(self, w, e, a, M): Mtilda = M * (1 - w[:, :, None]*e[:, None, :]) Mout = Mtilda + w[:, :, None]*a[:, None, :] return Mout def _get_content_w(self, beta, k, M): num = beta[:, None] * _cosine_distance(M, k) return _softmax(num) def _get_location_w(self, g, s, C, gamma, wc, w_tm1): wg = g[:, None] * wc + (1-g[:, None])*w_tm1 Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3) wtilda = (Cs * s[:, :, None]).sum(axis=1) wout = _renorm(wtilda ** gamma[:, None]) return wout def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s): k = T.tanh(T.dot(h, W_k) + b_k) # + 1e-6 c = T.dot(h, W_c) + b_c beta = T.nnet.relu(c[:, 0]) + 1e-4 g = T.nnet.sigmoid(c[:, 1]) gamma = T.nnet.relu(c[:, 2]) + 1.0001 s = T.nnet.softmax(T.dot(h, W_s) + b_s) return k, beta, g, gamma, s def get_initial_states(self, X): batch_size = X.shape[0] init_M = self.M.dimshuffle(0, 'x', 'x').repeat( batch_size, axis=0).repeat(self.n_slots, axis=1).repeat( self.m_length, axis=2) init_M = init_M.flatten(ndim=2) init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0) if self.inner_rnn == 'lstm': init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0) return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h, init_c] else: return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h] @property def output_shape(self): input_shape = self.input_shape if self.return_sequences: return input_shape[0], input_shape[1], self.output_dim else: return input_shape[0], self.output_dim def get_full_output(self, train=False): """ This method is for research and visualization purposes. Use it as X = model.get_input() # full model Y = ntm.get_output() # this layer F = theano.function([X], Y, allow_input_downcast=True) [memory, read_address, write_address, rnn_state] = F(x) if inner_rnn == "lstm" use it as [memory, read_address, write_address, rnn_cell, rnn_state] = F(x) """ # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) assert K.ndim(X) == 3 if K._BACKEND == 'tensorflow': if not self.input_shape[1]: raise Exception('When using TensorFlow, you should define ' + 'explicitely the number of timesteps of ' + 'your sequences. Make sure the first layer ' + 'has a "batch_input_shape" argument ' + 'including the samples axis.') mask = self.get_output_mask(train) if mask: # apply mask X *= K.cast(K.expand_dims(mask), X.dtype) masking = True else: masking = False if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(X) states = rnn_states(self.step, X, initial_states, go_backwards=self.go_backwards, masking=masking) return states def step(self, x, states): M_tm1, wr_tm1, ww_tm1 = states[:3] # reshape M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length)) # read h_tm1 = states[3:] k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output( h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read) wc_read = self._get_content_w(beta_read, k_read, M_tm1) wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read, wc_read, wr_tm1) M_read = self._read(wr_t, M_tm1) # update controller h_t = _update_controller(self, x, h_tm1, M_read) # write k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output( h_t[0], self.W_k_write, self.b_k_write, self.W_c_write, self.b_c_write, self.W_s_write, self.b_s_write) wc_write = self._get_content_w(beta_write, k_write, M_tm1) ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write, wc_write, ww_tm1) e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e) a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a) M_t = self._write(ww_t, e, a, M_tm1) M_t = M_t.flatten(ndim=2) return h_t[0], [M_t, wr_t, ww_t] + h_t
def pre_train_model(cursor, word_model): checkpoint_path = "data/cp.ckpt" cursor.execute(f"SELECT count(cleaned) from tweets") (tweet_count, ) = cursor.fetchone() tweet_count = min(tweet_count, tweet_limit) cursor.execute(""" SELECT max_sentence_length FROM metadata WHERE id = (SELECT MAX(id) FROM metadata) """) (max_sentence_len, ) = cursor.fetchone() pretrained_weights = word_model.wv.vectors vocab_size, emdedding_size = pretrained_weights.shape print('Result embedding shape:', pretrained_weights.shape) # print('Checking similar words:') # for word in ['model', 'network', 'train', 'learn']: # most_similar = ', '.join( # '%s (%.2f)' % (similar, dist) for similar, dist in word_model.wv.most_similar(word)[:8]) # print(' %s -> %s' % (word, most_similar)) def word2idx(word): return word_model.wv.vocab[word].index def idx2word(idx): return word_model.wv.index2word[idx] print('\nPreparing the data for LSTM...') train_x = np.zeros([tweet_count, max_sentence_len], dtype=np.int32) train_y = np.zeros([tweet_count], dtype=np.int32) for i, sentence in enumerate(get_tokens(cursor)): for t, word in enumerate(sentence[:-1]): train_x[i, t] = word2idx(word) train_y[i] = word2idx(sentence[-1]) print('train_x shape:', train_x.shape) print('train_y shape:', train_y.shape) print('\nTraining LSTM...') model = Sequential() model.add( Embedding(input_dim=vocab_size, output_dim=emdedding_size, weights=[pretrained_weights])) model.add(LSTM(units=emdedding_size)) model.add(Dense(units=vocab_size)) model.add(Activation('softmax')) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') def sample(preds, temperature=1.0): if temperature <= 0: return np.argmax(preds) preds = np.asarray(preds).astype('float64') preds = np.log(preds) / temperature exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas) def generate_next(text, num_generated=15): word_idxs = [word2idx(word) for word in text.lower().split()] for _ in range(num_generated): prediction = model.predict(x=np.array(word_idxs)) idx = sample(prediction[-1], temperature=0.7) word_idxs.append(idx) if idx == eos: break pieces = list(map(lambda idx: idx2word(idx), word_idxs)) if use_nltk: return ' '.join(pieces) result = sp.decode_pieces(pieces) return result def on_epoch_end(epoch, _): print('\nGenerating text after epoch: %d' % epoch) texts = [sos, sos, sos] for text in texts: sample = generate_next(text) print('%s... -> %s' % (text, sample)) # Create a callback that saves the model's weights cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1) model.summary() model.fit( train_x, train_y, batch_size=128, epochs=20, callbacks=[cp_callback, LambdaCallback(on_epoch_end=on_epoch_end)]) return model
class DRAW(Recurrent): '''DRAW Parameters: =========== h_dim : encoder/decoder dimension z_dim : random sample dimension (reparametrization trick output) input_shape : (n_channels, rows, cols) N_enc : Size of the encoder's filter bank (MNIST default: 2) N_dec : Size of the decoder's filter bank (MNIST default: 5) n_steps : number of sampling steps (or how long it takes to draw, default 64) inner_rnn : str with rnn type ('gru' default) truncate_gradient : int (-1 default) return_sequences : bool (False default) ''' theano_rng = theano_rng() def __init__(self, input_shape, h_dim, z_dim, N_enc=2, N_dec=5, n_steps=64, inner_rnn='gru', truncate_gradient=-1, return_sequences=False, canvas_activation=T.nnet.sigmoid, init='glorot_uniform', inner_init='orthogonal'): self.input = T.tensor4() self.h_dim = h_dim # this is 256 for MNIST self.z_dim = z_dim # this is 100 for MNIST self.input_shape = input_shape self.N_enc = N_enc self.N_dec = N_dec self.truncate_gradient = truncate_gradient self.return_sequences = return_sequences self.n_steps = n_steps self.canvas_activation = canvas_activation self.height = input_shape[1] self.width = input_shape[2] self.inner_rnn = inner_rnn if inner_rnn == 'gru': self.enc = GRU(input_dim=self.input_shape[0]*2*self.N_enc**2 + h_dim, output_dim=h_dim, init=init, inner_init=inner_init) self.dec = GRU(input_dim=z_dim, output_dim=h_dim, init=init, inner_init=inner_init) elif inner_rnn == 'lstm': self.enc = LSTM(input_dim=self.input_shape[0]*2*self.N_enc**2 + h_dim, output_dim=h_dim, init=init, inner_init=inner_init) self.dec = LSTM(input_dim=z_dim, output_dim=h_dim, init=init, inner_init=inner_init) else: raise ValueError('This type of inner_rnn is not supported') self.init_canvas = shared_zeros(input_shape) # canvas and hidden state self.init_h_enc = shared_zeros((h_dim)) # initial values self.init_h_dec = shared_zeros((h_dim)) # should be trained self.L_enc = self.enc.init((h_dim, 5)) # "read" attention parameters (eq. 21) self.L_dec = self.enc.init((h_dim, 5)) # "write" attention parameters (eq. 28) self.b_enc = shared_zeros((5)) # "read" attention parameters (eq. 21) self.b_dec = shared_zeros((5)) # "write" attention parameters (eq. 28) self.W_patch = self.enc.init((h_dim, self.N_dec**2*self.input_shape[0])) self.b_patch = shared_zeros((self.N_dec**2*self.input_shape[0])) self.W_mean = self.enc.init((h_dim, z_dim)) self.W_sigma = self.enc.init((h_dim, z_dim)) self.b_mean = shared_zeros((z_dim)) self.b_sigma = shared_zeros((z_dim)) self.params = self.enc.params + self.dec.params + [ self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch, self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma] # self.init_canvas, self.init_h_enc, self.init_h_dec] def init_updates(self): self.get_output(train=True) # populate regularizers list def _get_attention_params(self, h, L, b, N): p = T.dot(h, L) + b gx = self.width * (p[:, 0]+1) / 2. gy = self.height * (p[:, 1]+1) / 2. sigma2 = T.exp(p[:, 2]) delta = T.exp(p[:, 3]) * (max(self.width, self.height) - 1) / (N - 1.) gamma = T.exp(p[:, 4]) return gx, gy, sigma2, delta, gamma def _get_filterbank(self, gx, gy, sigma2, delta, N): small = 1e-4 i = T.arange(N) a = T.arange(self.width) b = T.arange(self.height) mx = gx[:, None] + delta[:, None] * (i - N/2. - .5) my = gy[:, None] + delta[:, None] * (i - N/2. - .5) Fx = T.exp(-(a - mx[:, :, None])**2 / 2. / sigma2[:, None, None]) Fx /= (Fx.sum(axis=-1)[:, :, None] + small) Fy = T.exp(-(b - my[:, :, None])**2 / 2. / sigma2[:, None, None]) Fy /= (Fy.sum(axis=-1)[:, :, None] + small) return Fx, Fy def _read(self, x, gamma, Fx, Fy): Fyx = (Fy[:, None, :, :, None] * x[:, :, None, :, :]).sum(axis=3) FxT = Fx.dimshuffle(0, 2, 1) FyxFx = (Fyx[:, :, :, :, None] * FxT[:, None, None, :, :]).sum(axis=3) return gamma[:, None, None, None] * FyxFx def _get_patch(self, h): write_patch = T.dot(h, self.W_patch) + self.b_patch write_patch = write_patch.reshape((h.shape[0], self.input_shape[0], self.N_dec, self.N_dec)) return write_patch def _write(self, write_patch, gamma, Fx, Fy): Fyx = (Fy[:, None, :, :, None] * write_patch[:, :, :, None, :]).sum(axis=2) FyxFx = (Fyx[:, :, :, :, None] * Fx[:, None, None, :, :]).sum(axis=3) return FyxFx / gamma[:, None, None, None] def _get_sample(self, h, eps): mean = T.dot(h, self.W_mean) + self.b_mean # eps = self.theano_rng.normal(avg=0., std=1., size=mean.shape) logsigma = T.dot(h, self.W_sigma) + self.b_sigma sigma = T.exp(logsigma) if self._train_state: sample = mean + eps * sigma else: sample = mean + 0 * eps * sigma kl = -.5 - logsigma + .5 * (mean**2 + sigma**2) # kl = .5 * (mean**2 + sigma**2 - logsigma - 1) return sample, kl.sum(axis=-1) def _get_rnn_input(self, x, rnn): if self.inner_rnn == 'gru': x_z = T.dot(x, rnn.W_z) + rnn.b_z x_r = T.dot(x, rnn.W_r) + rnn.b_r x_h = T.dot(x, rnn.W_h) + rnn.b_h return x_z, x_r, x_h elif self.inner_rnn == 'lstm': xi = T.dot(x, rnn.W_i) + rnn.b_i xf = T.dot(x, rnn.W_f) + rnn.b_f xc = T.dot(x, rnn.W_c) + rnn.b_c xo = T.dot(x, rnn.W_o) + rnn.b_o return xi, xf, xc, xo def _get_rnn_state(self, rnn, *args): mask = 1. # no masking if self.inner_rnn == 'gru': x_z, x_r, x_h, h_tm1 = args h = rnn._step(x_z, x_r, x_h, mask, h_tm1, rnn.U_z, rnn.U_r, rnn.U_h) return h elif self.inner_rnn == 'lstm': xi, xf, xc, xo, h_tm1, cell_tm1 = args h, cell = rnn._step(xi, xf, xo, xc, mask, h_tm1, cell_tm1, rnn.U_i, rnn.U_f, rnn.U_o, rnn.U_c) return h, cell def _get_initial_states(self, X): if self.inner_rnn == 'gru': batch_size = X.shape[0] canvas = self.init_canvas.dimshuffle('x', 0, 1, 2).repeat(batch_size, axis=0) init_enc = self.init_h_enc.dimshuffle('x', 0).repeat(batch_size, axis=0) init_dec = self.init_h_dec.dimshuffle('x', 0).repeat(batch_size, axis=0) else: canvas = alloc_zeros_matrix(*X.shape) # + self.init_canvas[None, :, :, :] init_enc = alloc_zeros_matrix(X.shape[0], self.h_dim) # + self.init_h_enc[None, :] init_dec = alloc_zeros_matrix(X.shape[0], self.h_dim) # + self.init_h_dec[None, :] return canvas, init_enc, init_dec def _step(self, eps, canvas, h_enc, h_dec, x, *args): x_hat = x - self.canvas_activation(canvas) gx, gy, sigma2, delta, gamma = self._get_attention_params( h_dec, self.L_enc, self.b_enc, self.N_enc) Fx, Fy = self._get_filterbank(gx, gy, sigma2, delta, self.N_enc) read_x = self._read(x, gamma, Fx, Fy).flatten(ndim=2) read_x_hat = self._read(x_hat, gamma, Fx, Fy).flatten(ndim=2) enc_input = T.concatenate([read_x, read_x_hat, h_dec], axis=-1) x_enc_z, x_enc_r, x_enc_h = self._get_rnn_input(enc_input, self.enc) new_h_enc = self._get_rnn_state(self.enc, x_enc_z, x_enc_r, x_enc_h, h_enc) sample, kl = self._get_sample(new_h_enc, eps) x_dec_z, x_dec_r, x_dec_h = self._get_rnn_input(sample, self.dec) new_h_dec = self._get_rnn_state(self.dec, x_dec_z, x_dec_r, x_dec_h, h_dec) gx_w, gy_w, sigma2_w, delta_w, gamma_w = self._get_attention_params( new_h_dec, self.L_dec, self.b_dec, self.N_dec) Fx_w, Fy_w = self._get_filterbank(gx_w, gy_w, sigma2_w, delta_w, self.N_dec) write_patch = self._get_patch(new_h_dec) new_canvas = canvas + self._write(write_patch, gamma_w, Fx_w, Fy_w) return new_canvas, new_h_enc, new_h_dec, kl def _step_lstm(self, eps, canvas, h_enc, cell_enc, h_dec, cell_dec, x, *args): x_hat = x - self.canvas_activation(canvas) gx, gy, sigma2, delta, gamma = self._get_attention_params( h_dec, self.L_enc, self.b_enc, self.N_enc) Fx, Fy = self._get_filterbank(gx, gy, sigma2, delta, self.N_enc) read_x = self._read(x, gamma, Fx, Fy).flatten(ndim=2) read_x_hat = self._read(x_hat, gamma, Fx, Fy).flatten(ndim=2) enc_input = T.concatenate([read_x, read_x_hat, h_dec.flatten(ndim=2)], axis=1) x_enc_i, x_enc_f, x_enc_c, x_enc_o = self._get_rnn_input(enc_input, self.enc) new_h_enc, new_cell_enc = self._get_rnn_state( self.enc, x_enc_i, x_enc_f, x_enc_c, x_enc_o, h_enc, cell_enc) sample, kl = self._get_sample(new_h_enc, eps) x_dec_i, x_dec_f, x_dec_c, x_dec_o = self._get_rnn_input(sample, self.dec) new_h_dec, new_cell_dec = self._get_rnn_state( self.dec, x_dec_i, x_dec_f, x_dec_c, x_dec_o, h_dec, cell_dec) gx_w, gy_w, sigma2_w, delta_w, gamma_w = self._get_attention_params( new_h_dec, self.L_dec, self.b_dec, self.N_dec) Fx_w, Fy_w = self._get_filterbank(gx_w, gy_w, sigma2_w, delta_w, self.N_dec) write_patch = self._get_patch(new_h_dec) new_canvas = canvas + self._write(write_patch, gamma_w, Fx_w, Fy_w) return new_canvas, new_h_enc, new_cell_enc, new_h_dec, new_cell_dec, kl def get_output(self, train=False): self._train_state = train X, eps = self.get_input(train).values() eps = eps.dimshuffle(1, 0, 2) canvas, init_enc, init_dec = self._get_initial_states(X) if self.inner_rnn == 'gru': outputs, updates = scan(self._step, sequences=eps, outputs_info=[canvas, init_enc, init_dec, None], non_sequences=[X, ] + self.params, # n_steps=self.n_steps, truncate_gradient=self.truncate_gradient) elif self.inner_rnn == 'lstm': outputs, updates = scan(self._step_lstm, sequences=eps, outputs_info=[0*canvas, 0*init_enc, 0*init_enc, 0*init_dec, 0*init_dec, None], non_sequences=[X, ] + self.params, truncate_gradient=self.truncate_gradient) kl = outputs[-1].sum(axis=0).mean() if train: # self.updates = updates self.regularizers = [SimpleCost(kl), ] if self.return_sequences: return [outputs[0].dimshuffle(1, 0, 2, 3, 4), kl] else: return [outputs[0][-1], kl]
print('Loading data...') (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print("Pad sequences (samples x time)") X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Sequential() model.add(Embedding(max_features, 128, input_length=maxlen, dropout=0.5)) model.add(LSTM(128, dropout_W=0.5, dropout_U=0.5)) # try using a GRU instead, for fun model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") print("Train...") model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, validation_data=(X_test, y_test),
class NeuralTuringMachine(Recurrent): """ Neural Turing Machines Parameters: ----------- shift_range: int, number of available shifts, ex. if 3, avilable shifts are (-1, 0, 1) n_slots: number of memory locations m_length: memory length at each location inner_rnn: str, supported values are 'gru' and 'lstm' output_dim: hidden state size (RNN controller output_dim) Known issues and TODO: ---------------------- Theano may complain when n_slots == 1. Add multiple reading and writing heads. """ def __init__(self, output_dim, n_slots, m_length, shift_range=3, inner_rnn='gru', truncate_gradient=-1, return_sequences=False, init='glorot_uniform', inner_init='orthogonal', input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.n_slots = n_slots self.m_length = m_length self.shift_range = shift_range self.init = init self.inner_init = inner_init self.inner_rnn = inner_rnn self.return_sequences = return_sequences self.truncate_gradient = truncate_gradient self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(NeuralTuringMachine, self).__init__(**kwargs) def build(self): input_leng, input_dim = self.input_shape[1:] self.input = T.tensor3() if self.inner_rnn == 'gru': self.rnn = GRU( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) else: raise ValueError('this inner_rnn is not implemented yet.') self.rnn.build() # initial memory, state, read and write vecotrs self.M = theano.shared((.001*np.ones((1,)).astype(floatX))) self.init_h = shared_zeros((self.output_dim)) self.init_wr = self.rnn.init((self.n_slots,)) self.init_ww = self.rnn.init((self.n_slots,)) # write self.W_e = self.rnn.init((self.output_dim, self.m_length)) # erase self.b_e = shared_zeros((self.m_length)) self.W_a = self.rnn.init((self.output_dim, self.m_length)) # add self.b_a = shared_zeros((self.m_length)) # get_w parameters for reading operation self.W_k_read = self.rnn.init((self.output_dim, self.m_length)) self.b_k_read = self.rnn.init((self.m_length, )) self.W_c_read = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 in Graves et. al 2014 self.b_c_read = shared_zeros((3)) self.W_s_read = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_read = shared_zeros((self.shift_range)) # get_w parameters for writing operation self.W_k_write = self.rnn.init((self.output_dim, self.m_length)) self.b_k_write = self.rnn.init((self.m_length, )) self.W_c_write = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = shared_zeros((3)) self.W_s_write = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_write = shared_zeros((self.shift_range)) self.C = _circulant(self.n_slots, self.shift_range) self.params = self.rnn.params + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww] if self.inner_rnn == 'lstm': self.init_c = shared_zeros((self.output_dim)) self.params = self.params + [self.init_c, ] def _read(self, w, M): return (w[:, :, None]*M).sum(axis=1) def _write(self, w, e, a, M, mask): Mtilda = M * (1 - w[:, :, None]*e[:, None, :]) Mout = Mtilda + w[:, :, None]*a[:, None, :] return mask[:, None, None]*Mout + (1-mask[:, None, None])*M def _get_content_w(self, beta, k, M): num = beta[:, None] * _cosine_distance(M, k) return _softmax(num) def _get_location_w(self, g, s, C, gamma, wc, w_tm1, mask): wg = g[:, None] * wc + (1-g[:, None])*w_tm1 Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3) wtilda = (Cs * s[:, :, None]).sum(axis=1) wout = _renorm(wtilda ** gamma[:, None]) return mask[:, None] * wout + (1-mask[:, None])*w_tm1 def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s): k = T.tanh(T.dot(h, W_k) + b_k) # + 1e-6 c = T.dot(h, W_c) + b_c beta = T.nnet.relu(c[:, 0]) + 1e-6 g = T.nnet.sigmoid(c[:, 1]) gamma = T.nnet.relu(c[:, 2]) + 1 s = T.nnet.softmax(T.dot(h, W_s) + b_s) return k, beta, g, gamma, s def _get_initial_states(self, batch_size): init_M = self.M.dimshuffle(0, 'x', 'x').repeat( batch_size, axis=0).repeat(self.n_slots, axis=1).repeat( self.m_length, axis=2) init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0) if self.inner_rnn == 'lstm': init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0) return init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h, init_c else: return init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h def _step(self, x, mask, M_tm1, wr_tm1, ww_tm1, *args): # read if self.inner_rnn == 'lstm': h_tm1 = args[0:2][::-1] # (cell_tm1, h_tm1) else: h_tm1 = args[0:1] # (h_tm1, ) k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output( h_tm1[-1], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read) wc_read = self._get_content_w(beta_read, k_read, M_tm1) wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read, wc_read, wr_tm1, mask) M_read = self._read(wr_t, M_tm1) # update controller h_t = _update_controller(self, x, h_tm1, M_read, mask) # write k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output( h_t[-1], self.W_k_write, self.b_k_write, self.W_c_write, self.b_c_write, self.W_s_write, self.b_s_write) wc_write = self._get_content_w(beta_write, k_write, M_tm1) ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write, wc_write, ww_tm1, mask) e = T.nnet.sigmoid(T.dot(h_t[-1], self.W_e) + self.b_e) a = T.tanh(T.dot(h_t[-1], self.W_a) + self.b_a) M_t = self._write(ww_t, e, a, M_tm1, mask) return (M_t, wr_t, ww_t) + h_t def get_output(self, train=False): outputs = self.get_full_output(train) if self.return_sequences: return outputs[-1] else: return outputs[-1][:, -1] @property def output_shape(self): input_shape = self.input_shape if self.return_sequences: return input_shape[0], input_shape[1], self.output_dim else: return input_shape[0], self.output_dim def get_full_output(self, train=False): """ This method is for research and visualization purposes. Use it as: X = model.get_input() # full model Y = ntm.get_output() # this layer F = theano.function([X], Y, allow_input_downcast=True) [memory, read_address, write_address, rnn_state] = F(x) if inner_rnn == "lstm" use it as: [memory, read_address, write_address, rnn_cell, rnn_state] = F(x) """ X = self.get_input(train) padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)[:, :, 0] X = X.dimshuffle((1, 0, 2)) init_states = self._get_initial_states(X.shape[1]) outputs, updates = theano.scan(self._step, sequences=[X, padded_mask], outputs_info=init_states, non_sequences=self.params, truncate_gradient=self.truncate_gradient) out = [outputs[0].dimshuffle((1, 0, 2, 3)), outputs[1].dimshuffle(1, 0, 2), outputs[2].dimshuffle((1, 0, 2)), outputs[3].dimshuffle((1, 0, 2))] if self.inner_rnn == 'lstm': out + [outputs[4].dimshuffle((1, 0, 2))] return out
MAX_SEQLEN, make_categorical=True) print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape) # define network EMBED_SIZE = 32 HIDDEN_SIZE = 32 BATCH_SIZE = 32 NUM_EPOCHS = 5 model = Sequential() model.add( Embedding(len(s_word2id), EMBED_SIZE, input_length=MAX_SEQLEN, dropout=0.2)) model.add(LSTM(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2)) #model.add(GRU(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2)) #model.add(Bidirectional(LSTM(HIDDEN_SIZE, dropout_W=0.2, dropout_U=0.2))) model.add(RepeatVector(MAX_SEQLEN)) model.add(LSTM(HIDDEN_SIZE, return_sequences=True)) #model.add(GRU(HIDDEN_SIZE, return_sequences=True)) #model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences=True))) model.add(TimeDistributed(Dense(len(t_pos2id)))) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) history = model.fit(Xtrain, Ytrain,
class NeuralTuringMachine(Recurrent): print(7) """ Neural Turing Machines Non obvious parameter: ---------------------- shift_range: int, number of available shifts, ex. if 3, avilable shifts are (-1, 0, 1) n_slots: number of memory locations m_length: memory length at each location Known issues: ------------- Theano may complain when n_slots == 1. """ def __init__(self, output_dim, n_slots, m_length, shift_range=3, inner_rnn='lstm', init='glorot_uniform', inner_init='orthogonal', input_dim=4, input_length=5, **kwargs): self.output_dim = output_dim self.n_slots = n_slots self.m_length = m_length self.shift_range = shift_range self.init = init self.inner_init = inner_init self.inner_rnn = inner_rnn self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(NeuralTuringMachine, self).__init__(**kwargs) def build(self): input_leng, input_dim = self.input_shape[1:] self.input = T.tensor3() if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim + self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) else: raise ValueError('this inner_rnn is not implemented yet.') self.rnn.build() # initial memory, state, read and write vecotrs self.M = theano.shared((.001 * np.ones((1,)).astype(floatX))) print(self.M) self.init_h = K.zeros((self.output_dim)) self.init_wr = self.rnn.init((self.n_slots,)) self.init_ww = self.rnn.init((self.n_slots,)) # write self.W_e = self.rnn.init((self.output_dim, self.m_length)) # erase self.b_e = K.zeros((self.m_length)) self.W_a = self.rnn.init((self.output_dim, self.m_length)) # add self.b_a = K.zeros((self.m_length)) # get_w parameters for reading operation self.W_k_read = self.rnn.init((self.output_dim, self.m_length)) self.b_k_read = self.rnn.init((self.m_length,)) self.W_c_read = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_read = K.zeros((3)) self.W_s_read = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_read = K.zeros((self.shift_range)) # b_s lol! not intentional # get_w parameters for writing operation self.W_k_write = self.rnn.init((self.output_dim, self.m_length)) self.b_k_write = self.rnn.init((self.m_length,)) self.W_c_write = self.rnn.init((self.output_dim, 3)) # 3 = beta, g, gamma see eq. 5, 7, 9 self.b_c_write = K.zeros((3)) self.W_s_write = self.rnn.init((self.output_dim, self.shift_range)) self.b_s_write = K.zeros((self.shift_range)) self.C = _circulant(self.n_slots, self.shift_range) self.trainable_weights = self.rnn.trainable_weights + [ self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write, self.b_s_write, self.W_c_write, self.b_c_write, self.M, self.init_h, self.init_wr, self.init_ww] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.output_dim)) self.trainable_weights = self.trainable_weights + [self.init_c, ] def _read(self, w, M): return (w[:, :, None] * M).sum(axis=1) def _write(self, w, e, a, M): Mtilda = M * (1 - w[:, :, None] * e[:, None, :]) Mout = Mtilda + w[:, :, None] * a[:, None, :] return Mout def _get_content_w(self, beta, k, M): num = beta[:, None] * _cosine_distance(M, k) return _softmax(num) def _get_location_w(self, g, s, C, gamma, wc, w_tm1): wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1 Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3) wtilda = (Cs * s[:, :, None]).sum(axis=1) wout = _renorm(wtilda ** gamma[:, None]) return wout def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s): k = T.tanh(T.dot(h, W_k) + b_k) # + 1e-6 c = T.dot(h, W_c) + b_c beta = T.nnet.relu(c[:, 0]) + 1e-4 g = T.nnet.sigmoid(c[:, 1]) gamma = T.nnet.relu(c[:, 2]) + 1.0001 s = T.nnet.softmax(T.dot(h, W_s) + b_s) return k, beta, g, gamma, s def get_initial_states(self, X): batch_size = X.shape[0] init_M = self.M.dimshuffle(0, 'x', 'x').repeat( batch_size, axis=0).repeat(self.n_slots, axis=1).repeat( self.m_length, axis=2) init_M = init_M.flatten(ndim=2) init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0) init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0) if self.inner_rnn == 'lstm': init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0) return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h, init_c] else: return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h] @property def output_shape(self): input_shape = self.input_shape if self.return_sequences: return input_shape[0], input_shape[1], self.output_dim else: return input_shape[0], self.output_dim def step(self, x, states): '''print(self.input_shape) print(self.n_slots) print(self.m_length)''' M_tm1, wr_tm1, ww_tm1 = states[:3] # reshape M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length)) # read h_tm1 = states[3:] k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output( h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read, self.b_s_read) wc_read = self._get_content_w(beta_read, k_read, M_tm1) wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read, wc_read, wr_tm1) M_read = self._read(wr_t, M_tm1) # update controller h_t = _update_controller(self, x, h_tm1, M_read) # write k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output( h_t[0], self.W_k_write, self.b_k_write, self.W_c_write, self.b_c_write, self.W_s_write, self.b_s_write) wc_write = self._get_content_w(beta_write, k_write, M_tm1) ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write, wc_write, ww_tm1) e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e) a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a) M_t = self._write(ww_t, e, a, M_tm1) M_t = M_t.flatten(ndim=2) print(h_t[0], [M_t, wr_t, ww_t] + h_t) return h_t[0], [M_t, wr_t, ww_t] + h_t
class Stack(Recurrent): """ Stack and queue network output_dim = output dimension n_slots = number of memory slot m_length = dimention of the memory rnn_size = output length of the memory controler inner_rnn = "lstm" only lstm is supported stack = True to create neural stack or False to create neural queue from Learning to Transduce with Unbounded Memory [[http://arxiv.org/pdf/1506.02516.pdf]] """ def __init__(self, output_dim, n_slots, m_length, inner_rnn='lstm',rnn_size=64, stack=True, init='glorot_uniform', inner_init='orthogonal', input_dim=None, input_length=None, **kwargs): self.output_dim = output_dim self.n_slots = n_slots + 1 # because we start at time 1 self.m_length = m_length self.init = init self.inner_init = inner_init if inner_rnn != "lstm": print "Only lstm is supported" raise self.inner_rnn = inner_rnn self.rnn_size = rnn_size self.stack = stack self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(Stack, self).__init__(**kwargs) def build(self): input_leng, input_dim = self.input_shape[1:] self.input = T.tensor3() if self.inner_rnn == 'gru': self.rnn = GRU( activation='relu', input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.output_dim, init=self.init, inner_init=self.inner_init) elif self.inner_rnn == 'lstm': self.rnn = LSTM( input_dim=input_dim+self.m_length, input_length=input_leng, output_dim=self.rnn_size, init=self.init, forget_bias_init='zero', inner_init=self.inner_init) else: raise ValueError('this inner_rnn is not implemented yet.') self.rnn.build() self.init_h = K.zeros((self.rnn_size)) self.W_d = self.rnn.init((self.rnn_size,1)) self.W_u = self.rnn.init((self.rnn_size,1)) self.W_v = self.rnn.init((self.rnn_size,self.m_length)) self.W_o = self.rnn.init((self.rnn_size,self.output_dim)) self.b_d = K.zeros((1,),name="b_d") self.b_u = K.zeros((1,),name="b_u") self.b_v = K.zeros((self.m_length,)) self.b_o = K.zeros((self.output_dim,)) self.trainable_weights = self.rnn.trainable_weights + [ self.W_d, self.b_d, self.W_v, self.b_v, self.W_u, self.b_u, self.W_o, self.b_o, self.init_h] if self.inner_rnn == 'lstm': self.init_c = K.zeros((self.rnn_size)) self.trainable_weights = self.trainable_weights + [self.init_c, ] #self.trainable_weights =[self.W_d] def get_initial_states(self, X): batch_size = X.shape[0] init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0) init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0) init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0) init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0) itime = K.zeros((1,),dtype=np.int32) if self.inner_rnn == 'lstm': init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0) return [init_r , init_V,init_S,itime,init_h,init_c] @property def output_shape(self): input_shape = self.input_shape if self.return_sequences: return input_shape[0], input_shape[1], self.output_dim else: return input_shape[0], self.output_dim def step(self, x, states): r_tm1, V_tm1,s_tm1,time = states[:4] h_tm1 = states[4:] r_tm1 = r_tm1 op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1), h_tm1) # op_t = op_t + print_name_shape("W_d",self.W_d.get_value()) op_t = op_t #op_t = op_t[:,0,:] d_t = K.sigmoid( K.dot(op_t, self.W_d) + self.b_d) u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u) v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) time = time + 1 V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], u_t[::,0], v_t,time[0],stack=self.stack) return o_t, [r_t, V_t, s_t, time] + h_t