def predict(main_config, model_config, model, experiment_name): model = MODELS[model] model_dir = str(main_config['DATA']['model_dir']) vectorizer = DatasetVectorizer(model_dir) max_doc_len = vectorizer.max_sentence_len vocabulary_size = vectorizer.vocabulary_size model = model(max_doc_len, vocabulary_size, main_config, model_config) with tf.Session() as session: saver = tf.train.Saver() last_checkpoint = tf.train.latest_checkpoint('{}/{}'.format( model_dir, experiment_name)) saver.restore(session, last_checkpoint) while True: x1 = input('First sentence:') x2 = input('Second sentence:') x1_sen = vectorizer.vectorize(x1) x2_sen = vectorizer.vectorize(x2) feed_dict = { model.x1: x1_sen, model.x2: x2_sen, model.is_training: False } prediction = session.run([model.temp_sim], feed_dict=feed_dict) print(prediction)
def predict(main_config, model_config, model): model_name = '{}_{}'.format(model, main_config['PARAMS']['embedding_size']) model = MODELS[model_name] model_dir = str(main_config['DATA']['model_dir']) vectorizer = DatasetVectorizer(model_dir) max_doc_len = vectorizer.max_sentence_len vocabulary_size = vectorizer.vocabulary_size model = model(max_doc_len, vocabulary_size, main_config, model_config) with tf.Session() as session: saver = tf.train.Saver() last_checkpoint = tf.train.latest_checkpoint('{}/{}/model'.format( model_dir, model_name)) saver.restore(session, last_checkpoint) while True: x = input('Text:') x_sen = vectorizer.vectorize(x) feed_dict = {model.x: x_sen} prediction = session.run([model.temp_sim], feed_dict=feed_dict) print(prediction)
def train( main_config, model_config, model_name, experiment_name, dataset_name, ): main_cfg = MainConfig(main_config) model = MODELS[model_name] dataset = dataset_type.get_dataset(dataset_name) train_data = dataset.train_set_pairs() vectorizer = DatasetVectorizer(main_cfg.model_dir, raw_sentence_pairs=train_data) dataset_helper = Dataset(vectorizer, dataset, main_cfg.batch_size) max_sentence_len = vectorizer.max_sentence_len vocabulary_size = vectorizer.vocabulary_size train_mini_sen1, train_mini_sen2, train_mini_labels = dataset_helper.pick_train_mini_batch( ) train_mini_labels = train_mini_labels.reshape(-1, 1) test_sentence1, test_sentence2 = dataset_helper.test_instances() test_labels = dataset_helper.test_labels() test_labels = test_labels.reshape(-1, 1) num_batches = dataset_helper.num_batches model = model( max_sentence_len, vocabulary_size, main_config, model_config, ) model_saver = ModelSaver( main_cfg.model_dir, experiment_name, main_cfg.checkpoints_to_keep, ) config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=main_cfg.log_device_placement, ) with tf.Session(config=config) as session: global_step = 0 init = tf.global_variables_initializer() session.run(init) log_saver = LogSaver( main_cfg.logs_path, experiment_name, dataset_name, session.graph, ) model_evaluator = ModelEvaluator(model, session) metrics = {'acc': 0.0} time_per_epoch = [] log('Training model for {} epochs'.format(main_cfg.num_epochs)) for epoch in tqdm(range(main_cfg.num_epochs), desc='Epochs'): start_time = time.time() train_sentence1, train_sentence2 = dataset_helper.train_instances( shuffle=True) train_labels = dataset_helper.train_labels() train_batch_helper = BatchHelper( train_sentence1, train_sentence2, train_labels, main_cfg.batch_size, ) # small eval set for measuring dev accuracy dev_sentence1, dev_sentence2, dev_labels = dataset_helper.dev_instances( ) dev_labels = dev_labels.reshape(-1, 1) tqdm_iter = tqdm(range(num_batches), total=num_batches, desc="Batches", leave=False, postfix=metrics) for batch in tqdm_iter: global_step += 1 sentence1_batch, sentence2_batch, labels_batch = train_batch_helper.next( batch) feed_dict_train = { model.x1: sentence1_batch, model.x2: sentence2_batch, model.is_training: True, model.labels: labels_batch, } loss, _ = session.run([model.loss, model.opt], feed_dict=feed_dict_train) if batch % main_cfg.eval_every == 0: feed_dict_train = { model.x1: train_mini_sen1, model.x2: train_mini_sen2, model.is_training: False, model.labels: train_mini_labels, } train_accuracy, train_summary = session.run( [model.accuracy, model.summary_op], feed_dict=feed_dict_train, ) log_saver.log_train(train_summary, global_step) feed_dict_dev = { model.x1: dev_sentence1, model.x2: dev_sentence2, model.is_training: False, model.labels: dev_labels } dev_accuracy, dev_summary = session.run( [model.accuracy, model.summary_op], feed_dict=feed_dict_dev, ) log_saver.log_dev(dev_summary, global_step) tqdm_iter.set_postfix( dev_acc='{:.2f}'.format(float(dev_accuracy)), train_acc='{:.2f}'.format(float(train_accuracy)), loss='{:.2f}'.format(float(loss)), epoch=epoch) if global_step % main_cfg.save_every == 0: model_saver.save(session, global_step=global_step) model_evaluator.evaluate_dev(dev_sentence1, dev_sentence2, dev_labels) end_time = time.time() total_time = timer(start_time, end_time) time_per_epoch.append(total_time) model_saver.save(session, global_step=global_step) model_evaluator.evaluate_test(test_sentence1, test_sentence2, test_labels) model_evaluator.save_evaluation( '{}/{}'.format(main_cfg.model_dir, experiment_name), time_per_epoch[-1], dataset)
def __init__(self, master): self.frame = master self.frame.title('Multihead Siamese Nets') sample1 = StringVar(master, value=SAMPLE_SENTENCE1) sample2 = StringVar(master, value=SAMPLE_SENTENCE2) self.first_sentence_entry = Entry( self.frame, width=50, font="Helvetica {}".format(GUI_FONT_SIZE), textvariable=sample1) self.second_sentence_entry = Entry( self.frame, width=50, font="Helvetica {}".format(GUI_FONT_SIZE), textvariable=sample2) self.predictButton = Button(self.frame, text='Predict', font="Helvetica {}".format(GUI_FONT_SIZE), command=self.predict) self.clearButton = Button(self.frame, text='Clear', command=self.clear, font="Helvetica {}".format(GUI_FONT_SIZE)) self.resultLabel = Label(self.frame, text='Result', font="Helvetica {}".format(GUI_FONT_SIZE)) self.first_sentence_label = Label( self.frame, text='Sentence 1', font="Helvetica {}".format(GUI_FONT_SIZE)) self.second_sentence_label = Label( self.frame, text='Sentence 2', font="Helvetica {}".format(GUI_FONT_SIZE)) self.main_config = init_config() self.model_dir = str(self.main_config['DATA']['model_dir']) model_dirs = [os.path.basename(x[0]) for x in os.walk(self.model_dir)] self.visualize_attentions = IntVar() self.visualize_attentions_checkbox = Checkbutton( master, text="Visualize attention weights", font="Helvetica {}".format(int(GUI_FONT_SIZE / 2)), variable=self.visualize_attentions, onvalue=1, offvalue=0) variable = StringVar(master) variable.set('Choose a model...') self.model_type = OptionMenu(master, variable, *model_dirs, command=self.load_model) self.model_type.configure(font=('Helvetica', GUI_FONT_SIZE)) self.first_sentence_entry.grid(row=0, column=1, columnspan=4) self.first_sentence_label.grid(row=0, column=0, sticky=E) self.second_sentence_entry.grid(row=1, column=1, columnspan=4) self.second_sentence_label.grid(row=1, column=0, sticky=E) self.model_type.grid(row=2, column=1, sticky=W + E, ipady=1) self.predictButton.grid(row=2, column=2, sticky=W + E, ipady=1) self.clearButton.grid(row=2, column=3, sticky=W + E, ipady=1) self.resultLabel.grid(row=2, column=4, sticky=W + E, ipady=1) self.vectorizer = DatasetVectorizer(self.model_dir) self.max_doc_len = self.vectorizer.max_sentence_len self.vocabulary_size = self.vectorizer.vocabulary_size self.session = tf.Session() self.model = None
class MultiheadSiameseNetGuiDemo: def __init__(self, master): self.frame = master self.frame.title('Multihead Siamese Nets') sample1 = StringVar(master, value=SAMPLE_SENTENCE1) sample2 = StringVar(master, value=SAMPLE_SENTENCE2) self.first_sentence_entry = Entry( self.frame, width=50, font="Helvetica {}".format(GUI_FONT_SIZE), textvariable=sample1) self.second_sentence_entry = Entry( self.frame, width=50, font="Helvetica {}".format(GUI_FONT_SIZE), textvariable=sample2) self.predictButton = Button(self.frame, text='Predict', font="Helvetica {}".format(GUI_FONT_SIZE), command=self.predict) self.clearButton = Button(self.frame, text='Clear', command=self.clear, font="Helvetica {}".format(GUI_FONT_SIZE)) self.resultLabel = Label(self.frame, text='Result', font="Helvetica {}".format(GUI_FONT_SIZE)) self.first_sentence_label = Label( self.frame, text='Sentence 1', font="Helvetica {}".format(GUI_FONT_SIZE)) self.second_sentence_label = Label( self.frame, text='Sentence 2', font="Helvetica {}".format(GUI_FONT_SIZE)) self.main_config = init_config() self.model_dir = str(self.main_config['DATA']['model_dir']) model_dirs = [os.path.basename(x[0]) for x in os.walk(self.model_dir)] self.visualize_attentions = IntVar() self.visualize_attentions_checkbox = Checkbutton( master, text="Visualize attention weights", font="Helvetica {}".format(int(GUI_FONT_SIZE / 2)), variable=self.visualize_attentions, onvalue=1, offvalue=0) variable = StringVar(master) variable.set('Choose a model...') self.model_type = OptionMenu(master, variable, *model_dirs, command=self.load_model) self.model_type.configure(font=('Helvetica', GUI_FONT_SIZE)) self.first_sentence_entry.grid(row=0, column=1, columnspan=4) self.first_sentence_label.grid(row=0, column=0, sticky=E) self.second_sentence_entry.grid(row=1, column=1, columnspan=4) self.second_sentence_label.grid(row=1, column=0, sticky=E) self.model_type.grid(row=2, column=1, sticky=W + E, ipady=1) self.predictButton.grid(row=2, column=2, sticky=W + E, ipady=1) self.clearButton.grid(row=2, column=3, sticky=W + E, ipady=1) self.resultLabel.grid(row=2, column=4, sticky=W + E, ipady=1) self.vectorizer = DatasetVectorizer(self.model_dir) self.max_doc_len = self.vectorizer.max_sentence_len self.vocabulary_size = self.vectorizer.vocabulary_size self.session = tf.Session() self.model = None def predict(self): if self.model: sentence1 = self.first_sentence_entry.get() sentence2 = self.second_sentence_entry.get() x1_sen = self.vectorizer.vectorize(sentence1) x2_sen = self.vectorizer.vectorize(sentence2) feed_dict = { self.model.x1: x1_sen, self.model.x2: x2_sen, self.model.is_training: False } if self.visualize_attentions.get(): prediction, at1, at2 = np.squeeze( self.session.run([ self.model.predictions, self.model.debug_vars['attentions_x1'], self.model.debug_vars['attentions_x2'] ], feed_dict=feed_dict)) visualization.visualize_attention_weights(at1, sentence1) visualization.visualize_attention_weights(at2, sentence2) else: prediction = np.squeeze( self.session.run(self.model.predictions, feed_dict=feed_dict)) prediction = np.round(prediction, 2) self.resultLabel['text'] = prediction if prediction < 0.5: self.resultLabel.configure(foreground="red") else: self.resultLabel.configure(foreground="green") else: messagebox.showerror("Error", "Choose a model to make a prediction.") def clear(self): self.first_sentence_entry.delete(0, 'end') self.second_sentence_entry.delete(0, 'end') self.resultLabel['text'] = '' def load_model(self, model_name): if 'multihead' in model_name: self.visualize_attentions_checkbox.grid(row=2, column=0, sticky=W + E, ipady=1) else: self.visualize_attentions_checkbox.grid_forget() tf.reset_default_graph() self.session = tf.Session() logger.info('Loading model: %s', model_name) model = MODELS[model_name.split('_')[0]] model_config = init_config(model_name.split('_')[0]) self.model = model(self.max_doc_len, self.vocabulary_size, self.main_config, model_config) saver = tf.train.Saver() last_checkpoint = tf.train.latest_checkpoint('{}/{}'.format( self.model_dir, model_name)) saver.restore(self.session, last_checkpoint) logger.info('Loaded model from: %s', last_checkpoint)
def train(main_config, model_config, model_name, dataset_name): main_cfg = MainConfig(main_config) model = MODELS[model_name] dataset = DATASETS[dataset_name]() model_name = '{}_{}'.format(model_name, main_config['PARAMS']['embedding_size']) train_data = dataset.train_set_pairs() vectorizer = DatasetVectorizer(train_data, main_cfg.model_dir) dataset_helper = Dataset(vectorizer, dataset, main_cfg.batch_size) max_sentence_len = vectorizer.max_sentence_len vocabulary_size = vectorizer.vocabulary_size train_mini_sen1, train_mini_sen2, train_mini_labels = dataset_helper.pick_train_mini_batch( ) train_mini_labels = train_mini_labels.reshape(-1, 1) test_sentence1, test_sentence2 = dataset_helper.test_instances() test_labels = dataset_helper.test_labels() test_labels = test_labels.reshape(-1, 1) num_batches = dataset_helper.num_batches model = model(max_sentence_len, vocabulary_size, main_config, model_config) model_saver = ModelSaver(main_cfg.model_dir, model_name, main_cfg.checkpoints_to_keep) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=main_cfg.log_device_placement) with tf.Session(config=config) as session: global_step = 0 init = tf.global_variables_initializer() session.run(init) log_saver = LogSaver(main_cfg.logs_path, model_name, dataset_name, session.graph) model_evaluator = ModelEvaluator(model, session) metrics = {'acc': 0.0} time_per_epoch = [] for epoch in tqdm(range(main_cfg.num_epochs), desc='Epochs'): start_time = time.time() train_sentence1, train_sentence2 = dataset_helper.train_instances( shuffle=True) train_labels = dataset_helper.train_labels() train_batch_helper = BatchHelper(train_sentence1, train_sentence2, train_labels, main_cfg.batch_size) # small eval set for measuring dev accuracy dev_sentence1, dev_sentence2, dev_labels = dataset_helper.dev_instances( ) dev_labels = dev_labels.reshape(-1, 1) tqdm_iter = tqdm(range(num_batches), total=num_batches, desc="Batches", leave=False, postfix=metrics) for batch in tqdm_iter: global_step += 1 sentence1_batch, sentence2_batch, labels_batch = train_batch_helper.next( batch) feed_dict_train = { model.x1: sentence1_batch, model.x2: sentence2_batch, model.is_training: True, model.labels: labels_batch } loss, _ = session.run([model.loss, model.opt], feed_dict=feed_dict_train) if batch % main_cfg.eval_every == 0: feed_dict_train = { model.x1: train_mini_sen1, model.x2: train_mini_sen2, model.is_training: False, model.labels: train_mini_labels } train_accuracy, train_summary = session.run( [model.accuracy, model.summary_op], feed_dict=feed_dict_train) log_saver.log_train(train_summary, global_step) feed_dict_dev = { model.x1: dev_sentence1, model.x2: dev_sentence2, model.is_training: False, model.labels: dev_labels } dev_accuracy, dev_summary = session.run( [model.accuracy, model.summary_op], feed_dict=feed_dict_dev) log_saver.log_dev(dev_summary, global_step) tqdm_iter.set_postfix( dev_acc='{:.2f}'.format(float(dev_accuracy)), train_acc='{:.2f}'.format(float(train_accuracy)), loss='{:.2f}'.format(float(loss)), epoch=epoch) if global_step % main_cfg.save_every == 0: model_saver.save(session, global_step=global_step) model_evaluator.evaluate_dev(dev_sentence1, dev_sentence2, dev_labels) end_time = time.time() total_time = timer(start_time, end_time) time_per_epoch.append(total_time) model_saver.save(session, global_step=global_step) feed_dict_train = { model.x1: test_sentence1, model.x2: test_sentence2, model.is_training: False, model.labels: test_labels } #train_accuracy, train_summary, train_e = session.run([model.accuracy, model.summary_op, model.e], # feed_dict=feed_dict_train) train_e = session.run([model.e], feed_dict=feed_dict_train) plt.clf() f = plt.figure(figsize=(8, 8.5)) ax = f.add_subplot(1, 1, 1) i = ax.imshow(train_e[0][0], interpolation='nearest', cmap='gray') cbaxes = f.add_axes([0.2, 0, 0.6, 0.03]) cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal') cbar.ax.set_xlabel('Probability', labelpad=2) f.savefig('attention_maps.pdf', bbox_inches='tight') f.show() plt.show() feed_dict_test = { model.x1: test_sentence1, model.x2: test_sentence2, model.is_training: False, model.labels: test_labels } test_accuracy, test_summary = session.run( [model.accuracy, model.summary_op], feed_dict=feed_dict_test) print('tst_acc:%.2f loss:%.2f', test_accuracy, loss) model_evaluator.evaluate_test(test_sentence1, test_sentence2, test_labels) model_evaluator.save_evaluation( '{}/{}'.format(main_cfg.model_dir, model_name), time_per_epoch[-1], dataset)