def Train(model, batch_size, n_epochs, train_c_new, train_r_new, train_l_new, dev_c_new, dev_r_new, dev_l_new): print("Now training the model...") # histories = Histories() histories = my_callbacks.Histories() start_time = time.time() # start_time = time.time() # compute_recall_ks(y_pred[:,0]) # print("---model evaluation time takes %s seconds ---" % (time.time() - start_time)) bestAcc = 0.0 patience = 0 print("\tbatch_size={}, nb_epoch={}".format(batch_size, n_epochs)) # for ep in range(1, args.n_epochs): for ep in range(1, n_epochs): model.fit([train_c_new, train_r_new], train_l_new, batch_size=batch_size, epochs=n_epochs, callbacks=[histories], validation_data=([dev_c_new, dev_r_new], dev_l_new), verbose=1) curAcc = histories.accs[0] if curAcc >= bestAcc: bestAcc = curAcc patience = 0 else: patience = patience + 1 # classify the test set y_pred = model.predict([test_c_new, test_r_new]) print("Perform on test set after Epoch: " + str(ep) + "...!") recall_k = compute_recall_ks(y_pred[:, 0]) # stop training the model when patience = 10 if patience > 10: print("Early stopping at epoch: " + str(ep)) break print("---Training finished, model training time takes %s seconds ---" % (time.time() - start_time)) return model
neg_branch = shared_cnn(neg_input) #concatenated = merge([pos_branch, neg_branch], mode='concat',name="coherence_out") concatenated = concatenate([pos_branch, neg_branch], axis=-1, name="coherence_out") # output is two latent coherence score final_model = Model([pos_input, neg_input], concatenated) #final_model.compile(loss='ranking_loss', optimizer='adam') final_model.compile(loss={'coherence_out': ranking_loss}, optimizer=opts.learn_alg) # setting callback histories = my_callbacks.Histories() print(shared_cnn.summary()) #print(final_model.summary()) print("------------------------------------------------") #writing model name if opts.f_list != "": ff = opts.f_list m_type = "Ext.CNN." else: ff = "None" m_type = "CNN." model_name = opts.model_dir + m_type + str(opts.p_num) + "_" + str(opts.dropout_ratio) + "_"+ str(opts.emb_size) + "_"+ str(opts.maxlen) + "_" \
def main(): parser = argparse.ArgumentParser() parser.register('type', 'bool', str2bool) parser.add_argument('--emb_dim', type=str, default=300, help='Embeddings dimension') parser.add_argument('--hidden_size', type=int, default=300, help='Hidden size') parser.add_argument('--batch_size', type=int, default=256, help='Batch size') parser.add_argument('--n_epochs', type=int, default=50, help='Num epochs') parser.add_argument('--lr', type=float, default=0.001, help='Learning rate') parser.add_argument('--optimizer', type=str, default='adam', help='Optimizer') parser.add_argument('--n_recurrent_layers', type=int, default=1, help='Num recurrent layers') parser.add_argument('--input_dir', type=str, default='./dataset/', help='Input dir') parser.add_argument('--save_model', type='bool', default=True, help='Whether to save the model') parser.add_argument('--model_fname', type=str, default='model/dual_encoder_lstm_classifier.h5', help='Model filename') parser.add_argument('--embedding_file', type=str, default='embeddings/glove.840B.300d.txt', help='Embedding filename') parser.add_argument('--seed', type=int, default=1337, help='Random seed') args = parser.parse_args() print('Model args: ', args) np.random.seed(args.seed) print("Starting...") # first, build index mapping words in the embeddings set # to their embedding vector print('Now indexing word vectors...') embeddings_index = {} f = open(args.embedding_file, 'r') for line in f: values = line.split() word = values[0] try: coefs = np.asarray(values[1:], dtype='float32') except ValueError: continue embeddings_index[word] = coefs f.close() MAX_SEQUENCE_LENGTH, MAX_NB_WORDS, word_index = pickle.load( open(args.input_dir + 'params.pkl', 'rb')) print("MAX_SEQUENCE_LENGTH: {}".format(MAX_SEQUENCE_LENGTH)) print("MAX_NB_WORDS: {}".format(MAX_NB_WORDS)) print("Now loading embedding matrix...") num_words = min(MAX_NB_WORDS, len(word_index)) + 1 embedding_matrix = np.zeros((num_words, args.emb_dim)) for word, i in word_index.items(): if i >= MAX_NB_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector print("Now building dual encoder lstm model...") # define lstm encoder encoder = Sequential() encoder.add( Embedding(output_dim=args.emb_dim, input_dim=MAX_NB_WORDS, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], mask_zero=True, trainable=True)) encoder.add(LSTM(units=args.hidden_size)) context_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') response_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') # encode the context and the response context_branch = encoder(context_input) response_branch = encoder(response_input) concatenated = merge([context_branch, response_branch], mode='mul') out = Dense((1), activation="sigmoid")(concatenated) dual_encoder = Model([context_input, response_input], out) dual_encoder.compile(loss='binary_crossentropy', optimizer=args.optimizer) print(encoder.summary()) print(dual_encoder.summary()) print("Now loading UDC data...") train_c, train_r, train_l = pickle.load( open(args.input_dir + 'train.pkl', 'rb')) test_c, test_r, test_l = pickle.load( open(args.input_dir + 'test.pkl', 'rb')) dev_c, dev_r, dev_l = pickle.load(open(args.input_dir + 'dev.pkl', 'rb')) print('Found %s training samples.' % len(train_c)) print('Found %s dev samples.' % len(dev_c)) print('Found %s test samples.' % len(test_c)) print("Now training the model...") histories = my_callbacks.Histories() bestAcc = 0.0 patience = 0 print("\tbatch_size={}, nb_epoch={}".format(args.batch_size, args.n_epochs)) for ep in range(1, args.n_epochs): dual_encoder.fit([train_c, train_r], train_l, batch_size=args.batch_size, epochs=1, callbacks=[histories], validation_data=([dev_c, dev_r], dev_l), verbose=1) curAcc = histories.accs[0] if curAcc >= bestAcc: bestAcc = curAcc patience = 0 else: patience = patience + 1 # classify the test set y_pred = dual_encoder.predict([test_c, test_r]) print("Perform on test set after Epoch: " + str(ep) + "...!") recall_k = compute_recall_ks(y_pred[:, 0]) # stop training the model when patience = 10 if patience > 10: print("Early stopping at epoch: " + str(ep)) break if args.save_model: print("Now saving the model... at {}".format(args.model_fname)) dual_encoder.save(args.model_fname)
def main(): parser = argparse.ArgumentParser() parser.register('type', 'bool', str2bool) parser.add_argument('--emb_dim', type=str, default=300, help='Embeddings dimension') parser.add_argument('--hidden_size', type=int, default=300, help='Hidden size') parser.add_argument('--batch_size', type=int, default=256, help='Batch size') parser.add_argument('--n_epochs', type=int, default=50, help='Num epochs') parser.add_argument('--lr', type=float, default=0.001, help='Learning rate') parser.add_argument('--optimizer', type=str, default='adam', help='Optimizer') parser.add_argument("--dropout_ratio", type=float, default=0.5, help="ratio of cells to drop out") parser.add_argument('--n_recurrent_layers', type=int, default=1, help='Num recurrent layers') parser.add_argument("--w_size", type=int, default=5, help="window size length of neighborhood in words") parser.add_argument("--pool_length", type=int, default=6, help="length for max pooling") parser.add_argument( "--nb_filter", type=int, default=150, help="nb of filter to be applied in convolution over words") parser.add_argument('--input_dir', type=str, default='./dataset/', help='Input dir') parser.add_argument('--save_model', type='bool', default=True, help='Whether to save the model') parser.add_argument('--model_fname', type=str, default='model/dual_encoder_lstm_classifier.h5', help='Model filename') parser.add_argument('--embedding_file', type=str, default='embeddings/glove.840B.300d.txt', help='Embedding filename') parser.add_argument('--seed', type=int, default=1337, help='Random seed') args = parser.parse_args() print('Model args: ', args) np.random.seed(args.seed) if not os.path.exists(args.model_fname): print("No pre-trained model...") print("Start building model...") # first, build index mapping words in the embeddings set # to their embedding vector print('Indexing word vectors.') embeddings_index = {} f = open(args.embedding_file, 'r') for line in f: values = line.split() word = values[0] #coefs = np.asarray(values[1:], dtype='float32') try: coefs = np.asarray(values[1:], dtype='float32') except ValueError: continue embeddings_index[word] = coefs f.close() print("Now loading UDC data...") train_c, train_r, train_l = pickle.load( open(args.input_dir + 'train.pkl', 'rb')) test_c, test_r, test_l = pickle.load( open(args.input_dir + 'test.pkl', 'rb')) dev_c, dev_r, dev_l = pickle.load( open(args.input_dir + 'dev.pkl', 'rb')) print('Found %s training samples.' % len(train_c)) print('Found %s dev samples.' % len(dev_c)) print('Found %s test samples.' % len(test_c)) MAX_SEQUENCE_LENGTH, MAX_NB_WORDS, word_index = pickle.load( open(args.input_dir + 'params.pkl', 'rb')) print("MAX_SEQUENCE_LENGTH: {}".format(MAX_SEQUENCE_LENGTH)) print("MAX_NB_WORDS: {}".format(MAX_NB_WORDS)) vocabs, E = init_vocab(args.emb_dim) print("Now loading entity-grid data...") train_egrid, train_label = load_and_numberize_egrids_with_labels( filelist="./dataset/list.train", maxlen=MAX_SEQUENCE_LENGTH, w_size=args.w_size, vocabs=vocabs) dev_egrid, dev_label = load_and_numberize_egrids_with_labels( filelist="./dataset/list.dev", maxlen=MAX_SEQUENCE_LENGTH, w_size=args.w_size, vocabs=vocabs) test_egrid, test_label = load_and_numberize_egrids_with_labels( filelist="./dataset/list.test", maxlen=MAX_SEQUENCE_LENGTH, w_size=args.w_size, vocabs=vocabs) #print (train_label[:10]) #print (list(train_l[:10])) #assert train_label == list(train_l) #assert dev_label == list(dev_l) #assert test_label == list(test_l) #randomly shuffle the training data #np.random.shuffle(train_egrid) print("Now loading embedding matrix...") num_words = min(MAX_NB_WORDS, len(word_index)) + 1 embedding_matrix = np.zeros((num_words, args.emb_dim)) for word, i in word_index.items(): if i >= MAX_NB_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector print("Now building the dual encoder lstm model...") encoder = Sequential() encoder.add( Embedding(output_dim=args.emb_dim, input_dim=MAX_NB_WORDS, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], mask_zero=True, trainable=True)) encoder.add(LSTM(units=args.hidden_size)) print("Now building the CNN egrid model...") sent_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') x = Embedding(output_dim=args.emb_dim, weights=[E], input_dim=len(vocabs), input_length=MAX_SEQUENCE_LENGTH, trainable=True)(sent_input) x = Convolution1D(nb_filter=args.nb_filter, filter_length=args.w_size, border_mode='valid', activation='relu', subsample_length=1)(x) x = MaxPooling1D(pool_length=args.pool_length)(x) x = Dropout(args.dropout_ratio)(x) x = Flatten()(x) x = Dropout(args.dropout_ratio)(x) x = Dense(300)(x) cnn = Model(sent_input, x) context_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') response_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') egrid_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') # these two models will share eveything from shared_cnn context_branch = encoder(context_input) response_branch = encoder(response_input) context_branch_cnn = cnn(egrid_input) concatenated = merge([context_branch, response_branch], mode='mul') concatenated = merge([concatenated, context_branch_cnn], mode='concat') out = Dense((1), activation="sigmoid")(concatenated) model = Model([context_input, response_input, egrid_input], out) model.compile(loss='binary_crossentropy', optimizer=args.optimizer) print(model.summary()) print("Now training the model...") histories = my_callbacks.Histories() bestAcc = 0.0 patience = 0 print("\tbatch_size={}, nb_epoch={}".format(args.batch_size, args.n_epochs)) for ep in range(1, args.n_epochs): #model.fit([train_c, train_r], train_l, #batch_size=args.batch_size, nb_epoch=1, callbacks=[histories], #validation_data=([dev_c, dev_r], dev_l), verbose=1) model.fit([train_c, train_r, train_egrid], train_l, batch_size=args.batch_size, epochs=1, callbacks=[histories], validation_data=([dev_c, dev_r, dev_egrid], dev_l), verbose=1) #model.save(model_name + "_ep." + str(ep) + ".h5") curAcc = histories.accs[0] if curAcc >= bestAcc: bestAcc = curAcc patience = 0 else: patience = patience + 1 #doing classify the test set y_pred = model.predict([test_c, test_r, test_egrid]) print("Perform on test set after Epoch: " + str(ep) + "...!") recall_k = compute_recall_ks(y_pred[:, 0]) #stop the model whch patience = 8 if patience > 10: print("Early stopping at epoch: " + str(ep)) break if args.save_model: print("Now saving the model... at {}".format(args.model_fname)) model.save(args.model_fname) else: print("Found pre-trained model...") model = K_load_model(args.model_fname) return model
def main(): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" parser = argparse.ArgumentParser() parser.register('type', 'bool', str2bool) parser.add_argument('--emb_dim', type=str, default=300, help='Embeddings dimension') parser.add_argument('--emb_trainable', type='bool', default=True, help='Whether fine tune embeddings') parser.add_argument('--hidden_size', type=int, default=300, help='Hidden size') parser.add_argument('--hidden_size_lstm', type=int, default=200, help='Hidden size') parser.add_argument('--batch_size', type=int, default=256, help='Batch size') parser.add_argument('--n_epochs', type=int, default=50, help='Num epochs') parser.add_argument('--lr', type=float, default=0.001, help='Learning rate') parser.add_argument('--optimizer', type=str, default='adam', help='Optimizer') parser.add_argument('--n_recurrent_layers', type=int, default=1, help='Num recurrent layers') parser.add_argument('--input_dir', type=str, default='./dataset/', help='Input dir') parser.add_argument('--save_model', type='bool', default=True, help='Whether to save the model') parser.add_argument('--model_fname', type=str, default='model/model.h5', help='Model filename') parser.add_argument('--embedding_file', type=str, default='embeddings/embeddings.vec', help='Embedding filename') parser.add_argument('--seed', type=int, default=1337, help='Random seed') args = parser.parse_args() print('Model args: ', args) np.random.seed(args.seed) print("Starting...") # first, build index mapping words in the embeddings set # to their embedding vector print('Now indexing word vectors...') embeddings_index = {} f = open(args.embedding_file, 'r') for line in f: values = line.split() word = values[0] try: coefs = np.asarray(values[1:], dtype='float32') except ValueError: continue embeddings_index[word] = coefs f.close() MAX_SEQUENCE_LENGTH, MAX_NB_WORDS, word_index = pickle.load( open(args.input_dir + 'params.pkl', 'rb')) print("MAX_SEQUENCE_LENGTH: {}".format(MAX_SEQUENCE_LENGTH)) print("MAX_NB_WORDS: {}".format(MAX_NB_WORDS)) print("Now loading embedding matrix...") num_words = min(MAX_NB_WORDS, len(word_index)) embedding_matrix = np.zeros((num_words, args.emb_dim)) for word, i in word_index.items(): if i >= MAX_NB_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector print("Now building dual encoder lstm model...") # define lstm encoder encoder = Sequential() encoder_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedding = Embedding(output_dim=args.emb_dim, input_dim=MAX_NB_WORDS, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], mask_zero=True, trainable=args.emb_trainable) embedded_input = embedding(encoder_input) output = LSTM(units=args.hidden_size)(embedded_input) encoder = Model(encoder_input, [output, embedded_input]) print(encoder.summary()) context_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') response_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') # encode the context and the response context_branch, context_embed = encoder(context_input) response_branch, response_embed = encoder(response_input) # compute the sequence level similarity vector S = keras.layers.multiply([context_branch, response_branch]) # compute the word level similarity matrix embed_mul = keras.layers.dot([context_embed, response_embed], axes=2) # transform the word level similarity matrix into a vector W = LSTM(units=200)(embed_mul) # concatenate the word and sequence level similarity vectors concatenated = keras.layers.concatenate([S, W]) out = Dense((1), activation="sigmoid")(concatenated) model = Model([context_input, response_input], out) model.compile(loss='binary_crossentropy', optimizer=args.optimizer) print(model.summary()) print("Now loading data...") train_c, train_r, train_l = pickle.load( open(args.input_dir + 'train.pkl', 'rb')) test_c, test_r, test_l = pickle.load( open(args.input_dir + 'test.pkl', 'rb')) dev_c, dev_r, dev_l = pickle.load(open(args.input_dir + 'dev.pkl', 'rb')) print('Found %s training samples.' % len(train_c)) print('Found %s dev samples.' % len(dev_c)) print('Found %s test samples.' % len(test_c)) print("Now training the model...") histories = my_callbacks.Histories() bestAcc = 0.0 patience = 0 print("\tbatch_size={}, nb_epoch={}".format(args.batch_size, args.n_epochs)) for ep in range(1, args.n_epochs): model.fit([train_c, train_r], train_l, batch_size=args.batch_size, epochs=1, callbacks=[histories], validation_data=([dev_c, dev_r], dev_l), verbose=1) curAcc = histories.accs[0] if curAcc >= bestAcc: bestAcc = curAcc patience = 0 if args.save_model: print("Now saving the model... at {}".format(args.model_fname)) model.save(args.model_fname) else: patience = patience + 1 # stop training the model when patience = 5 if patience > 5: print("Early stopping at epoch: " + str(ep)) break