def get_nn_model(token_dict_size): _logger.info('Initializing NN model with the following params:') _logger.info('Input dimension: %s (token vector size)' % TOKEN_REPRESENTATION_SIZE) _logger.info('Hidden dimension: %s' % HIDDEN_LAYER_DIMENSION) _logger.info('Output dimension: %s (token dict size)' % token_dict_size) _logger.info('Input seq length: %s ' % INPUT_SEQUENCE_LENGTH) _logger.info('Output seq length: %s ' % MAX_NORMALIZED_TOKEN_LENGTH) _logger.info('Batch size: %s' % SAMPLES_BATCH_SIZE) model = Sequential() seq2seq = SimpleSeq2Seq( input_dim=TOKEN_REPRESENTATION_SIZE, input_length=INPUT_SEQUENCE_LENGTH, hidden_dim=HIDDEN_LAYER_DIMENSION, output_dim=token_dict_size, output_length=MAX_NORMALIZED_TOKEN_LENGTH, depth=1 ) model.add(seq2seq) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # use previously saved model if it exists _logger.info('Looking for a model %s' % NN_MODEL_PATH) if os.path.isfile(NN_MODEL_PATH): _logger.info('Loading previously calculated weights...') model.load_weights(NN_MODEL_PATH) _logger.info('Model is built') return model
def train(self, train_names, train_codes, hyperparams, pct_train=0.8, lr=0.01, num_epoch=100): if self.model_name == 'SimpleSeq2Seq' or self.model_name == 'AttentionSeq2Seq': train_name, train_code, val_name, val_code, naming_data, hyperparams['n_tokens'] = trainModel.split_data(train_names, train_codes, hyperparams['output_length'], hyperparams['input_length'], pct_train) hyperparams['is_embedding'] = False train_name = trainModel.one_hot_name(train_name, hyperparams['n_tokens']) required_params = ['output_dim', 'output_length', 'input_length', 'is_embedding', 'n_tokens'] for param in required_params: assert param in hyperparams, (param) if self.model_name == 'SimpleSeq2Seq': model = SimpleSeq2Seq(**hyperparams) elif self.model_name == 'AttentionSeq2Seq': model = AttentionSeq2Seq(**hyperparams) elif self.model_name == 'Seq2Seq': model = Seq2Seq(**hyperparams) else: raise TypeError my_adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(optimizer=my_adam, loss='categorical_crossentropy') print ('fit...') model.fit(train_code, train_name, epochs=num_epoch) print ('predict...') predict_probs = model.predict(val_code) predict_idx = np.argmax(predict_probs, axis=2) print('evaluate...') exact_match, _ = trainModel.exact_match(naming_data, predict_idx, val_name) precision, recall, f1, _, _ = trainModel.evaluate_tokens(naming_data, predict_idx, val_name) return model, exact_match, precision, recall, f1, naming_data
def createModel(): model = SimpleSeq2Seq(input_dim=112, output_length=1, output_dim=5, depth=1) model.compile(loss='mse', optimizer='rmsprop') return model
def main(args): if len(args) < 1: sys.stderr.write("Error - one required argument: <data directory>\n") sys.exit(-1) working_dir = args[0] data_file = os.path.join(working_dir, 'thyme_relational_onePathsBetweenArgs.txt') sys.stderr.write("loading data file...\n") # learn alphabet from training data provider = dataset_noLabel.DatasetProvider(data_file) # now load training examples and labels train_x = provider.load(data_file) sys.stderr.write("finished loading data file...\n") # turn x into numpy array among other things maxlen = max([len(seq) for seq in train_x]) train_x =[x[0:maxlen] for x in train_x] train_x = pad_sequences(train_x, maxlen=maxlen) # prepare embedding matrix nb_words = len(provider.word2int) train_y = np.zeros((train_x.shape[0], maxlen, nb_words), dtype=np.bool) inst = 0 for x in train_x: train_y[inst, 0, x[0]]=1 train_y[inst, 1, x[1]]=1 inst=inst+1 pickle.dump(maxlen, open(os.path.join(working_dir, 's2s_maxlen.p'),"wb")) pickle.dump(provider.word2int, open(os.path.join(working_dir, 's2s_word2int.p'),"wb")) #pickle.dump(nb_words, open(os.path.join(working_dir, 'wd_nb_words.p'),"wb")) #pickle.dump(train_x, open(os.path.join(working_dir, 'train_x.p'),"wb")) sys.stderr.write("training encoder...\n") model = Sequential() model.add(Embedding(len(provider.word2int), EMBEDDING_DIM, input_length=maxlen, weights=None)) seq2seq = SimpleSeq2Seq( input_dim=EMBEDDING_DIM, input_length=maxlen, hidden_dim=10, output_dim=nb_words, output_length=maxlen, depth=1) model.add(seq2seq) model.compile(optimizer='RMSprop', loss='mse') model.fit(train_x,train_y, batch_size=50, nb_epoch=3) json_string = model.to_json() open(os.path.join(working_dir, 's2s_encoder_0.json'), 'w').write(json_string) model.save_weights(os.path.join(working_dir, 's2s_encoder_0.h5'), overwrite=True)
def train(self, input_file, pct_train=0.65, pct_val=0.05, pct_test=0.3): assert self.parameters == None, ("The model has already been trained!") assert "input_length" in self.hyperparameters, ('input_length') train_data, val_data, test_data, self.naming_data = DataGenerator.get_data_for_simple_seq2seq_with_validation_and_test( input_file, self.hyperparameters['output_length'], self.hyperparameters['input_length'], pct_train, pct_val, pct_test) train_name, train_code = train_data val_name, val_code = val_data self.test_name, self.test_code = test_data self.hyperparameters[ 'n_tokens'] = self.naming_data.all_tokens_dictionary.get_n_tokens( ) def one_hot_name(names, max_name_size, name_dim): X = np.zeros((len(names), max_name_size, name_dim)) for i, name in enumerate(names): for j, token in enumerate(name): X[i, j, token] = 1.0 return X train_name = one_hot_name(train_name, self.hyperparameters['output_length'], self.hyperparameters['n_tokens']) print('n_tokens: ', self.hyperparameters['n_tokens']) with open('train_name.txt', 'w') as f: f.write(str(train_name)) model = SimpleSeq2Seq(**self.hyperparameters) my_rms = optimizers.RMSprop(lr=0.0001, rho=0.9, epsilon=1e-06) my_adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(optimizer=my_adam, loss='categorical_crossentropy') self.model = model print('fit...') model.fit(train_code, train_name, epochs=20) print('predict...') predict_probs = model.predict(val_code) print('predict_probs.shape: ', predict_probs.shape) predict_idx = np.argmax(predict_probs, axis=2) print('predict_idx.shape: ', predict_idx.shape) print('Exact match evaluate...') exact_match_accuracy = self.exact_match(predict_idx, val_name) print('exact_match_accuracy: ', exact_match_accuracy) suggestions = self.show_names(predict_idx) original_names = self.show_names(val_name) with open('suggestions.txt', 'w') as f: for i in range(len(suggestions)): f.write('original name: ' + str(original_names[i]) + '\n') f.write('suggestions: ' + str(suggestions[i]) + '\n') f.write('\n')
def simple_s2s(input_length, input_dim, output_length, output_dim, hidden_dim, depth, dropout=0.0): model = SimpleSeq2Seq( input_shape=(input_length, input_dim), hidden_dim=hidden_dim, output_length=output_length, output_dim=output_dim, depth=depth, dropout=dropout ) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') return model
def simple_seq2seq_model(input_dim, hidden_dim, output_length, output_dim, input_length): model = Sequential() model_seq = SimpleSeq2Seq(input_dim=input_dim, hidden_dim=hidden_dim, output_length=output_length, output_dim=output_dim, input_length=input_length, readout_activation='softmax') model.add(model_seq) model.add(TimeDistributed(Dense(output_dim))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') #model.compile(loss='categorical_crossentropy', optimizer='rmsprop') return model
def test_SimpleSeq2Seq(): x = np.random.random((samples, input_length, input_dim)) y = np.random.random((samples, output_length, output_dim)) models = [] models += [ SimpleSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim)) ] models += [ SimpleSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2) ] for model in models: model.compile(loss='mse', optimizer='sgd') model.fit(x, y, nb_epoch=1)
def model_2(input_length, output_length, vocab_size, hidden_size=256): model = Sequential() model.add(SimpleSeq2Seq( input_dim=vocab_size, input_length=input_length, output_length=output_length, output_dim=vocab_size, hidden_dim=hidden_size, )) model.add(Dense(output_length * vocab_size)) model.add(Reshape((output_length, vocab_size))) model.add(TimeDistributed(Dense(vocab_size, activation='softmax'))) return model
def mnist_test(): # Load the dataset f = open('../../mnist.pkl', 'r') train_set, valid_set, test_set = cPickle.load(f) print(train_set[0].shape) model = SimpleSeq2Seq(input_dim=28, output_dim=28, output_length=28) model.compile(loss='mse', optimizer='rmsprop') X = train_set[0].reshape(50000, 28, 28) # a = model.layers[0].get_weights() # print model.layers[0].set_weights(a) plt.imshow(X[0]) plt.savefig('infig') model.fit(X, X) aa = model.predict(X) plt.imshow(aa[0]) plt.savefig('outfig')
def create_model(self, input_shape, output_shape): self.model = Sequential() #self.model.add(Embedding(input_dim=input_shape[0], output_dim=input_shape[1]*2, input_length=input_shape[1])) #self.model.add(BatchNormalization()) self.model.add( SimpleSeq2Seq(output_dim=output_shape[2], output_length=output_shape[1], input_shape=(input_shape[1], input_shape[2]), unroll=True)) """ self.model.add(LSTM(512, return_sequences=True, unroll=True)) self.model.add(LSTM(1024, return_sequences=False, unroll=True)) self.model.add(Dense(output_classes)) self.model.add(Activation("softmax")) """ self.model.compile(optimizer=RMSprop(), loss='categorical_crossentropy', metrics=['accuracy']) self.model.summary()
def couplet(): maxlen = 30 #length of input sequence and output sequence max_features = 10000 #number of words embedding_dim = 256 #word embedding size data_dir = os.path.join(current_dir, "..", "data/couplet", "train") tr_handler = preprocess(os.path.join(data_dir, "in.txt"), os.path.join(data_dir, "out.txt"), max_features, maxlen) tr_handler.preprocess() tr_x, tr_y = tr_handler.gen_all() tr_y = tr_y.reshape(*tr_y.shape, 1) # model setting model = Sequential() # model.add(Masking(mask_value=0)) model.add( Embedding(max_features + 4, embedding_dim, input_length=maxlen + 2)) model.add( SimpleSeq2Seq(input_length=maxlen + 2, input_dim=embedding_dim, hidden_dim=256, output_length=maxlen + 2, output_dim=embedding_dim, depth=2, dropout=0.2)) model.add(TimeDistributed(Dense(max_features + 4, activation='softmax'))) masked_categorical_crossentropy = get_loss(10003) model.compile(optimizer='rmsprop', loss=masked_categorical_crossentropy, metrics=['accuracy']) plot_model(model, to_file='model.png', show_shapes=True) # training out_batch = NBatchLogger(display=100, data_handler=tr_handler) model.fit(tr_x, tr_y, epochs=100000, batch_size=64, callbacks=[out_batch]) print(model.predict(tr_x[:1])[0])
# Manual seeds os.environ['PYTHONHASHSEED'] = '0' # Necessary for python3 np.random.seed(29) rn.seed(29) tf.set_random_seed(29) hiddenDim = 256 depth = 2 myInput = Input(shape=( framesPerWord, nOfMouthPixels, )) attn = SimpleSeq2Seq(input_dim=nOfMouthPixels, hidden_dim=hiddenDim, output_length=2, output_dim=wordsVocabSize, depth=depth)(myInput) actn = Activation("softmax")(attn) Seq2SeqLipReaderModel = Model(inputs=myInput, outputs=actn) lr = 5e-4 adam = Adam(lr=lr) Seq2SeqLipReaderModel.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) Seq2SeqLipReaderModel.summary() filenamePre = 'SimpleSeq2Seq-h' + \ str(hiddenDim) + '-depth' + str(depth) + \ '-Adam-%1.e' % lr + '-GRIDcorpus-s'
def get_model(story_maxlen, seq_maxlen, num_inputs, answer_maxlen, char_embedding_size=30, hidden_dim=150, encoder_depth=1, decoder_depth=1, conv_nfilters=[], query_lstm_dims=[], hierarchical_lstm_dims=[], query_maxlen=None, dropout=0.5, seq2seq_model="attention", hierarchical=False): assert seq2seq_model in ["attention", "simple", "seq2seq"] if hierarchical: assert story_maxlen is not None, "Need story maxlen for hierarchical model" input_shape = (story_maxlen, seq_maxlen) else: input_shape = (seq_maxlen, ) # TODO Need to manually add this for correct output dims? Probably not since we add our own dense layer on top # We WOULD need this if we eliminate last dense layer and have only an activation there. # hidden_dims_decoder.append(num_inputs) input1 = Input(shape=input_shape) inputs = [input1] embed_inner_input = Input((input_shape[-1], )) embed1 = Embedding(num_inputs, char_embedding_size)(embed_inner_input) embed1 = Dropout(dropout)(embed1) pool = embed1 for conv_nfilter in conv_nfilters: conv_len3 = Convolution1D(conv_nfilter, 3, activation="tanh", border_mode="same")(pool) conv_len3 = Dropout(dropout)(conv_len3) conv_len2 = Convolution1D(conv_nfilter, 2, activation="tanh", border_mode="same")(pool) conv_len2 = Dropout(dropout)(conv_len2) pool1 = MaxPooling1D(pool_length=2, stride=2, border_mode='valid')(conv_len3) pool2 = MaxPooling1D(pool_length=2, stride=2, border_mode='valid')(conv_len2) # could try merge with sum here instead, and no dense layer pool = concatenate([pool1, pool2]) pool = TimeDistributed(Dense(conv_nfilter, activation="tanh"))(pool) if hierarchical: ln.debug("Inner LSTM input len is %s" % (pool._keras_shape[1])) seq_embedding = pool for hierarchical_lstm_dim in hierarchical_lstm_dims[:-1]: seq_embedding = LSTM(hierarchical_lstm_dim, return_sequences=True)(seq_embedding) seq_embedding = LSTM(hierarchical_lstm_dims[-1], return_sequences=False)(seq_embedding) embed_inner_model = Model(embed_inner_input, seq_embedding) seqs_embedded = TimeDistributed(embed_inner_model)(input1) else: embed_inner_model = Model(embed_inner_input, pool) seqs_embedded = embed_inner_model(input1) ln.debug("Will attend over %s time steps" % seqs_embedded._keras_shape[1]) if query_maxlen is not None: input2 = Input(shape=(query_maxlen, )) inputs.append(input2) embed2 = Embedding(num_inputs, char_embedding_size)(input2) # conv_embed2 = Convolution1D(hidden_dim, 2, activation="relu", border_mode="same")(embed2) # pool2 = MaxPooling1D(pool_length=2, border_mode='valid')(conv_embed2) query_encoded = embed2 for query_lstm_dim in query_lstm_dims[:-1]: query_encoded = LSTM(query_lstm_dim, return_sequences=True)(query_encoded) query_encoded = LSTM(query_lstm_dims[-1], return_sequences=False)(query_encoded) query_encoded = RepeatVector( seqs_embedded._keras_shape[1])(query_encoded) seqs_embedded = concatenate([seqs_embedded, query_encoded]) if seq2seq_model == "attention": decoded = AttentionSeq2Seq( batch_input_shape=seqs_embedded._keras_shape, hidden_dim=hidden_dim, output_dim=hidden_dim, depth=(encoder_depth, decoder_depth), output_length=answer_maxlen, # TODO add dropout once it works )(seqs_embedded) elif seq2seq_model == "seq2seq": decoded = Seq2Seq(batch_input_shape=seqs_embedded._keras_shape, hidden_dim=hidden_dim, output_dim=hidden_dim, depth=(encoder_depth, decoder_depth), output_length=answer_maxlen, peek=True)(seqs_embedded) else: decoded = SimpleSeq2Seq(batch_input_shape=seqs_embedded._keras_shape, hidden_dim=hidden_dim, output_dim=hidden_dim, depth=(encoder_depth, decoder_depth), output_length=answer_maxlen, dropout=dropout)(seqs_embedded) pred = TimeDistributed(Dense(num_inputs, activation="softmax"))(decoded) model = Model(inputs=inputs, outputs=pred) return model
Y.append(word_model.wv[dialog[1]]) if i % 100 == 0: print i, "/", total_dialogs X = np.array(X) Y = np.array(Y) num_ex, sequence_length, vec_size = X.shape print X.shape print u"Построение модели." # vec_size - длина векторного слова # sequence_length - длина предложения model = SimpleSeq2Seq(input_dim=vec_size, hidden_dim=vec_size, output_length=sequence_length, output_dim=vec_size, depth=2) model.compile(loss='mse', optimizer='rmsprop', metrics=['acc']) model.summary() plot_model(model, to_file='model.png') print u"Настройка tensorboard." tensorboard = TensorBoard(log_dir='data/generated/tensorboard', histogram_freq=0, write_graph=True, write_images=False) tensorboard.set_model(model) total_iterations = 100000
import seq2seq from seq2seq.models import SimpleSeq2Seq model = SimpleSeq2Seq(input_dim=5, hidden_dim=10, output_length=8, output_dim=8) model.compile(loss='mse', optimizer='rmsprop')
time_steps = 10 epsilon_std = 1 padded_idxes = pad_sequences(idxes, maxlen=time_steps, dtype='int32', \ padding='pre', truncating='pre', value=0.) x_train, x_test = train_test_split(padded_idxes, test_size=0.2, random_state=42) x_train # x_train, x_test = np.zeros((50,time_steps)), np.zeros((10,time_steps)) optimizer, loss, x, y = SimpleSeq2Seq(output_dim=embedding_size, output_length=time_steps, latent_dim=latent_dim, \ batch_size=batch_size, epsilon_std=epsilon_std, lookup_matrix=lookup_matrix, \ input_shape=(time_steps, embedding_size)) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) epochs = 0 print_freq = 15 while epochs < 30: print "Epochs:" + str(epochs) i = 0 while i < len(x_train): samples = x_train[i:i + batch_size, :] i += batch_size _, loss_val = sess.run([optimizer, loss], feed_dict={
def define_seq2seq_model(self): self.model = SimpleSeq2Seq(input_dim=self.n_in, hidden_dim=512, output_length=self.n_out, output_dim=self.n_out, depth=3) self.model.compile(loss=self.loss_function, optimizer=self.optimizer)
y_val.append(answers[sample][:, :]) (X_train, y_train) = np.asarray(questions), np.asarray(answers) X_val, y_val = np.asarray(X_val), np.asarray(y_val) # print('Pad sequences (samples x time)') # X_train = sequence.pad_sequences(X_train, maxlen=maxlen) # X_val = sequence.pad_sequences(X_val, maxlen=maxlen) # print('X_train shape:', X_train.shape) # print('X_val shape:', X_val.shape) # print('y_val shape:', y_val.shape) print('Building model...') # model = Sequential() model = SimpleSeq2Seq(input_dim=26, hidden_dim=50, output_length=8, output_dim=26) model.compile(loss='mse', optimizer='adam') # model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', 'perplexity']) lookup = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] for i in xrange(10): print('Training...') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=128, validation_data=(X_val, y_val),
def build_model(self,input_dim,hidden_dim,output_length,output_dim,depth): #model = SimpleSeq2Seq(input_dim=, hidden_dim=10, output_length=8, output_dim=20, depth=(4, 5)) self.model = SimpleSeq2Seq(input_dim=self.input_dim, hidden_dim=self.hidden_dim, output_length=self.output_length, output_dim=self.output_dim, depth=self.depth) self.model.compile(loss='mse', optimizer='rmsprop')
# np.savetxt(f_handle, iids, delimiter=",",fmt="%s") # f_handle.close() iids = np.genfromtxt( '/home/simon/Documents/LiClipse Workspace/MoviePrediction/iids.csv', delimiter=',', dtype=int) X, Y, ds = getnewdata(iids[2], config) Xt, Yt, ds = getnewdata( iids[randint(config.getTestIndex()[0], config.getTestIndex()[1])], config) model = Sequential() s2s = SimpleSeq2Seq(batch_input_shape=(1, X.shape[1], X.shape[2]), hidden_dim=1, output_length=config.getWindows()[1], output_dim=1) model.add(s2s) model.add(Dense(40, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(2999, activation='softmax')) opt = optimizers.Nadam() model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy']) for rounds in range(0, config.getEpochs()): for index in range(config.getTrainIndex()[0], config.getTrainIndex()[1]): print "Index ", index, " Round ", rounds X, Y, ds = getnewdata(iids[index], config) r = randint(config.getTestIndex()[0], config.getTestIndex()[1])
import seq2seq from seq2seq.models import SimpleSeq2Seq model = SimpleSeq2Seq(input_dim=5, hidden_dim=10, output_length=8, output_dim=20, depth=(4, 5)) model.compile(loss='mse', optimizer='rmsprop')
y_valid = valid["y"] test = handler.getTest(padded=True, one_hot=True) X_test = test["X"] y_test = test["y"] print "dimensiones del dataset:" print "entrenamiento: " + str(X_train.shape) print "validacion cruzada " + str(X_valid.shape) print "testeo: " + str(X_test.shape) print "-------------------------------" model = SimpleSeq2Seq(input_dim=X_train.shape[2], hidden_dim=512, output_length=handler.max_output_length, output_dim=y_train.shape[2], depth=(1, 1)) model.compile(loss='mse', optimizer='rmsprop', metrics=['accuracy']) print model.summary() model_dir = "model/vmodel3.json" weights_dir = "model/vmodel3.h5" print "Salvando modelo en: " + model_dir model_json = model.to_json() with open(model_dir, "w") as json_file: json_file.write(model_json) print "arquitectura salvada!" ITER = 500 print "Entrenando modelo: ..."