def train_max_epochs(self, args, train0, train1, dev0, dev1, vocab, no_of_epochs, writer, time, save_epochs_flag=False, save_batch_flag=False, save_batch=5): print("No of epochs: ", no_of_epochs) self.train() self.enc_optim = optim.AdamW(self.encoder.parameters(), lr=args.learning_rate, betas=(self.beta1, self.beta2)) self.gen_optim = optim.AdamW(self.generator.parameters(), lr=args.learning_rate, betas=(self.beta1, self.beta2)) self.discrim1_optim = optim.AdamW(self.discriminator1.parameters(), lr=args.learning_rate, betas=(self.beta1, self.beta2)) self.discrim2_optim = optim.AdamW(self.discriminator2.parameters(), lr=args.learning_rate, betas=(self.beta1, self.beta2)) Path(args.saves_path).mkdir(parents=True, exist_ok=True) saves_path = os.path.join(args.saves_path, utils.get_filename(args, time, "model")) Path(saves_path).mkdir(parents=True, exist_ok=True) flag = True with autograd.detect_anomaly(): for epoch in range(no_of_epochs): random.shuffle(train0) random.shuffle(train1) batches0, batches1, _1, _2 = utils.get_batches(train0, train1, vocab.word2id, args.batch_size, noisy=True) dev_batches0 = [] dev_batches1 = [] if self.args.dev: dev_batches0, dev_batches1, _, _ = utils.get_batches(dev0, dev1, vocab.word2id, args.batch_size, noisy=True) # batches0, batches1, _1, _2 = utils.get_batches_bpe(train0, train1, vocab.word2id, # args.batch_size, noisy=True) random.shuffle(batches0) random.shuffle(batches1) print("Epoch: ", epoch) self.logger.info("Epoch: "+str(epoch)) train_flag = self(args, batches0, batches1, dev_batches0, dev_batches1, vocab, no_of_epochs, epoch, writer, time, save_epochs_flag=False, save_batch_flag=False, save_batch=5) if train_flag: break
def build_and_train_network(lstm_sizes, vocab_size, embed_size, epochs, batch_size, learning_rate, keep_prob, train_x, val_x, train_y, val_y): inputs_, labels_, keep_prob_ = model_inputs() embed = build_embedding_layer(inputs_, vocab_size, embed_size) initial_state, lstm_outputs, lstm_cell, final_state = build_lstm_layers( lstm_sizes, embed, keep_prob_, batch_size) print(lstm_outputs) print(labels_) print(learning_rate) predictions, loss, optimizer = build_cost_fn_and_opt( lstm_outputs, labels_, learning_rate) accuracy = build_accuracy(predictions, labels_) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) n_batches = len(train_x) // batch_size for e in range(epochs): state = sess.run(initial_state) train_acc = [] for ii, (x, y) in enumerate( utl.get_batches(train_x, train_y, batch_size), 1): feed = { inputs_: x, labels_: y[:, None], keep_prob_: keep_prob, initial_state: state } loss_, state, _, batch_acc = sess.run( [loss, final_state, optimizer, accuracy], feed_dict=feed) train_acc.append(batch_acc) if (ii + 1) % n_batches == 0: val_acc = [] val_state = sess.run( lstm_cell.zero_state(batch_size, tf.float32)) for xx, yy in utl.get_batches(val_x, val_y, batch_size): feed = { inputs_: xx, labels_: yy[:, None], keep_prob_: 1, initial_state: val_state } val_batch_acc, val_state = sess.run( [accuracy, final_state], feed_dict=feed) val_acc.append(val_batch_acc) print( "Epoch: {}/{}...".format(e + 1, epochs), "Batch: {}/{}...".format(ii + 1, n_batches), "Train Loss: {:.3f}...".format(loss_), "Train Accruacy: {:.3f}...".format(np.mean(train_acc)), "Val Accuracy: {:.3f}".format(np.mean(val_acc))) saver.save(sess, "checkpoints/sentiment.ckpt")
def load_data(): # # NOTE: Loading and use of data structures is pretty f****d up here. # Some things require getting data from generators, others require NumPy arrays. # In the end we use both, and sometimes re-load the data from disk and/or re-transform # it more than once. # click.echo('Loading raw training data from %s...' % TRAIN_PATH) TRAIN_BATCHES = utils.get_batches(TRAIN_PATH, shuffle=False, batch_size=BATCH_SIZE) click.echo('Loading array from generator...') TRAIN_ARRAY = utils.get_data(TRAIN_PATH) click.echo('\tshape: %s' % (TRAIN_ARRAY.shape, )) click.echo() # TRAIN_DATA = os.path.join(MODEL_PATH, 'train_data.bc') # click.echo('Saving processed training data to %s...' % TRAIN_DATA) # utils.save_array(TRAIN_DATA, TRAIN_ARRAY) click.echo('Loading raw validation data from %s...' % VALID_PATH) VALID_BATCHES = utils.get_batches(VALID_PATH, shuffle=False, batch_size=BATCH_SIZE) click.echo('Loading array from generator...') VALID_ARRAY = utils.get_data(VALID_PATH) click.echo('\tshape: %s' % (VALID_ARRAY.shape, )) click.echo() return TRAIN_BATCHES, VALID_BATCHES, TRAIN_ARRAY, VALID_ARRAY
def train(model_name): """ # TODO Docstring. """ img_rows = 224 img_cols = 224 channel = 3 batch_size = 64 data_path = "../data/compcars/data/image/" imagenet_model_path = "../imagenet_models/" # Get images batches = utils.get_batches( data_path + 'train', gen=image.ImageDataGenerator( preprocessing_function=utils.vgg_preprocess, rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.15, zoom_range=0.1, channel_shift_range=10., horizontal_flip=True), batch_size=batch_size) val_batches = utils.get_batches( data_path + 'valid', gen=image.ImageDataGenerator( preprocessing_function=utils.vgg_preprocess), batch_size=batch_size) # Create model if model_name == "inception_v1": model = googlenet_model(img_rows, img_cols, channel, batches.nb_class, imagenet_model_path) elif model_name == "vgg19": model = vgg19_model(img_rows, img_cols, channel, batches.nb_class, imagenet_model_path) # Train and save intermediate results history = model.fit_generator(batches, validation_data=val_batches, samples_per_epoch=batches.nb_sample, nb_val_samples=val_batches.nb_sample, nb_epoch=50) model.save_weights(model_name + '_50.h5') history = model.fit_generator(batches, validation_data=val_batches, samples_per_epoch=batches.nb_sample, nb_val_samples=val_batches.nb_sample, nb_epoch=50) model.save_weights(model_name + '_100.h5')
def run(self, data, n_epochs, batch_size): x_train, y_train, x_test, y_test = data with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for j in range(n_epochs): batches = get_batches(x_train, y_train, batch_size=batch_size) self.train_epoch(sess, batches, epoch=j) print('\tEVALUATION: ', end='') test_batches = get_batches(x_test, y_test, batch_size=batch_size, shuffle=False) self.evaluate(sess, test_batches)
def _init(self): logger.info("Initializing ...") self.entity2id, self.id2entity, self.entid2tags = utils.generate_entity_property_idx( self.entityPath) self.property2id, self.id2property, self.proid2tags = utils.generate_entity_property_idx( self.propertyPath) self.entid2tycid = utils.generate_entity_tyc_idx( self.tycWordsPath, self.entity2id) self.train2id = utils.generate_data_idx(self.trainPath, self.entity2id, self.property2id) self.train2id_set = set([' '.join(map(str, t)) for t in self.train2id]) # use for sampling self.conid2attrid = utils.generate_conceptid_to_attributesid( self.conceptAttrPath, self.entity2id, self.property2id, self.max_attr_size) self.conAttr2id, self.conAttr2id_set = utils.generate_concept_attributes_idx( self.conceptAttrPath, self.entity2id, self.property2id) self.dev2id = utils.generate_data_idx(self.devPath, self.entity2id, self.property2id) self.test2id = utils.generate_data_idx(self.testPath, self.entity2id, self.property2id) self.test_entity_candidate_ids = utils.read_sample_candidates( self.test_entity_candi_path, self.entity2id) self.test_attr_candidate_ids = utils.read_sample_candidates( self.test_attr_candi_path, self.property2id) self.sample_ent_cand_ids = utils.read_sample_candidates( self.sample_ent_candi_path, self.entity2id) self.sample_attr_cand_ids = utils.read_sample_candidates( self.sample_attr_candi_path, self.property2id) self.trainTotal = len(self.train2id) self.conceptAttrTotal = len(self.conid2attrid) self.devTotal = len(self.dev2id) self.testTotal = len(self.test2id) self.entityTotal = len(self.entity2id) self.propertyTotal = len(self.property2id) # tencent init if self.embeddingPath is not None: self.ent_embeddings = utils.load_embeddings( self.entity2id, self.embeddingPath, self.entityTotal, self.ent_size) self.rel_embeddings = utils.load_embeddings( self.property2id, self.embeddingPath, self.propertyTotal, self.rel_size) self.dev2id_batches = utils.get_batches(self.dev2id, self.batch_size) self.test2id_batches = utils.get_batches(self.test2id, self.batch_size)
def augment_data(): gen = image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.1, horizontal_flip=True) train_batches = utils.get_batches(TRAIN_PATH, gen, batch_size=BATCH_SIZE) # NB: We don't want to augment or shuffle the validation set valid_batches = utils.get_batches(VALID_PATH, shuffle=False, batch_size=BATCH_SIZE) return train_batches, valid_batches
def predict(questions, ranking_model, conf, res): # question_data, input_data, output_data = transform_batch_data_filtered(questions, conf, res, is_test=True) # print_batch_shapes(input_data) batches = get_batches(questions, 10) predicted_data_batches = [] for question_batch in batches: question_data, input_data, output_data = transform_batch_data_filtered( question_batch, conf, res, is_test=True) predicted_data = BetterDict(input_data) predicted_data.update(question_data) print_batch_shapes(input_data) if conf.use_predicate_and_subject_outputs: predicted_answer_scores, predicted_subject_scores, predicted_predicate_scores = ranking_model.predict( input_data, batch_size=conf.batch_size) predicted_data.predicted_answer_scores = predicted_answer_scores predicted_data.predicted_subject_scores = predicted_subject_scores predicted_data.predicted_predicate_scores = predicted_predicate_scores else: predicted_answer_scores = ranking_model.predict( input_data, batch_size=conf.batch_size) predicted_data.predicted_answer_scores = predicted_answer_scores predicted_data_batches.append(predicted_data) return predicted_data_batches
def get_neighbourhood(self, checkpoint, x_test, temp=1.0, num_samples=10): answer_logits = [] pred_sentences = [] x_test_repeated = np.repeat(x_test, num_samples, axis=0) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, checkpoint) for batch_i, (input_batch, output_batch, sent_lengths) in enumerate( utils.get_batches(x_test_repeated, self.batch_size)): result = sess.run(self.inference_logits, feed_dict={self.input_data: input_batch, self.source_sentence_length: sent_lengths, self.keep_prob: 1.0, self.word_dropout_keep_prob: 1.0, self.z_temperature: temp}) answer_logits.extend(result) for idx, (actual, pred) in enumerate(zip(x_test_repeated, answer_logits)): pred_sentences.append(" ".join([self.idx_word[i] for i in pred if i not in [self.pad, self.eos]])) for j in range(len(pred_sentences)): if j % num_samples == 0: print('\nA: {}'.format(" ".join([self.idx_word[i] for i in x_test_repeated[j] if i not in [self.pad, self.eos]]))) print('G: {}'.format(pred_sentences[j]))
def test_network(model_dir, batch_size, test_x, test_y): inputs_, labels_, keep_prob_ = tl.model_inputs() embed = tl.build_embedding_layer(inputs_, vocab_size, embed_size) initial_state, lstm_outputs, lstm_cell, final_state = tl.build_lstm_layers( lstm_sizes, embed, keep_prob_, batch_size) predictions, loss, optimizer = tl.build_cost_fn_and_opt( lstm_outputs, labels_, learning_rate) accuracy = tl.build_accuracy(predictions, labels_) saver = tf.train.Saver() test_acc = [] with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(model_dir)) test_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32)) for ii, (x, y) in enumerate(utl.get_batches(test_x, test_y, batch_size), 1): feed = { inputs_: x, labels_: y[:, None], keep_prob_: 1, initial_state: test_state } batch_acc, test_state = sess.run([accuracy, final_state], feed_dict=feed) test_acc.append(batch_acc) print("Test Accuracy: {:.3f}".format(np.mean(test_acc)))
def test1(sess, net): saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state('model/checkpoints/') if ckpt is None: print 'Please train your model first' return path = ckpt.model_checkpoint_path print 'loading pre-trained model from %s.....' % path saver.restore(sess, path) batch_x, batch_y, sequence_lengths, batch_x_ids = \ utils.get_batches(all_sentences, all_tags, id_to_word_table, embeddings, 10) for index, (x, y, sequence_length_) in enumerate(zip(batch_x, batch_y, sequence_lengths)): tf_unary_scores, tf_transition_params = sess.run( [net.outputs, net.transition_params], feed_dict={net.x: [x]}) tf_unary_scores_ = tf_unary_scores[0][:sequence_length_] # Compute the highest scoring sequence. viterbi_sequence, _ = tf.contrib.crf.viterbi_decode( tf_unary_scores_, tf_transition_params) utils.display_predict(batch_x_ids[index][:sequence_length_], viterbi_sequence, id_to_word_table, id_to_tag_table)
def train(file_train, file_test, epochs, batch_size, input_dim_size, hidden_dim_size, output_dim_size, learning_rate): model = NeuralNetwork(input_dim_size, hidden_dim_size, output_dim_size) optimizer = utils.SGD(model, learning_rate) train_loader = load.DataLoader(file_train) train_features, train_labels = train_loader.load() for epoch in range(epochs): for minibatch_features, minibatch_labels in utils.get_batches( train_features, train_labels, batch_size, shuffle=True): # 順伝播 minibatch_features_reshaped = minibatch_features.T z1, minibatch_predicted_labels = model.forward( minibatch_features_reshaped) # 逆伝播 grads = model.backward(x=minibatch_features, z1=z1, y=minibatch_predicted_labels, t=minibatch_labels) # パラメータの更新 optimizer.update(grads) # テストデータによる Inference と評価 accuracy = inference_test.infer(file_test=file_test, model_trained=model) print('[{}] EPOCH {} Accuracy:{:.8f}'.format(datetime.datetime.today(), epoch, accuracy)) print('[{}] Finished Training'.format(datetime.datetime.today())) return model
def gradient_descent(data, word2Ind, N, V, C, num_iters, alpha=0.03): W1, W2, b1, b2 = initialize_model(N, V, random_seed=282) batch_size = 128 iters = 0 for x, y in get_batches(data, word2Ind, V, C, batch_size): z, h = forward_prop(x, W1, W2, b1, b2) yhat = softmax(z) cost = compute_cost(y, yhat, batch_size) if ((iters + 1) % 10 == 0): print("Iteration ", iters + 1, " cost: ", cost) grad_W1, grad_W2, grad_b1, grad_b2 = back_prop(x, yhat, y, h, W1, W2, b1, b2, batch_size) W1 -= alpha * grad_W1 W2 -= alpha * grad_W2 b1 -= alpha * grad_b1 b2 -= alpha * grad_b2 iters += 1 if (iters == num_iters): break if (iters % 100 == 0): alpha *= 0.66 return W1, W2, b1, b2
def train(self, data, labels, loss, epochs=10, batch_size=256, learning_rate=0.001, data_test=None, labels_test=None): for epoch in range(epochs): compteur = 0 nbBatch = int(np.shape(data)[0] / batch_size) for i, (X, Y) in enumerate( utils.get_batches(data, labels, batch_size=batch_size)): compteur += np.shape(X)[-1] Y = utils.oneHotEncoding(Y, self.outputSize) self.inputLayer.forward(X) if abs(i / float(nbBatch) - 0.5) < 1 / float(nbBatch) or abs( i / float(nbBatch) - 0.3) < 1 / float(nbBatch) or abs( i / float(nbBatch) - 0.8) < 1 / float(nbBatch): valueLoss = loss.compute_loss( Y, self.outputLayer.activatedOutput) precision = loss_functions.precision( Y, self.outputLayer.activatedOutput) print( f"TRAIN ({compteur}/{len(labels)}) Batch: {i+1} Epochs: {epoch+1} - Loss : {valueLoss} Precision : {precision}" ) grad = loss.compute_grad(Y, self.outputLayer.activatedOutput) self.outputLayer.backward(grad) self.inputLayer.optimize(learning_rate) self.inputLayer.initGrad() if data_test is not None and labels_test is not None: self.evaluate(data_test, labels_test, loss)
def test_loss(mdl, sess, x_test): # get training batches batches = get_batches(x_test.shape[0], mdl.batch_size) batch_lengths = np.array([len(batch) for batch in batches]) # initialize results loss = np.zeros(len(batches)) mse = np.zeros(len(batches)) # loop over the batches for j in range(len(batches)): # load a feed dictionary feed_dict = mdl.feed_dict_samples(x_test[batches[j]], False) # compute metrics loss[j], mse[j] = sess.run([mdl.loss_op, mdl.mse_op], feed_dict=feed_dict) # print update per = 100 * (j + 1) / len(batches) update_str = 'Evaluating test set performance. Percent Complete = {:.2f}%'.format( per) print('\r' + update_str, end='') # take the average loss = np.sum(loss * batch_lengths) / np.sum(batch_lengths) mse = np.sum(mse * batch_lengths) / np.sum(batch_lengths) # results print('\nTest Loss = {:f}, Test MSE = {:f}'.format(loss, mse)) return loss, mse
def predict_with_session(self, sess, data, users_test, movies_test): # much faster to reconstruct the whole table and make predictions from it data_reconstructed = np.zeros( (self.number_of_users, self.number_of_movies)) for users in get_batches(list(range(self.number_of_users)), batch_size=1024, do_shuffle=False): user_ratings = [data[i, :] for i in users] ratings_reconstructed = sess.run(self.input_reconstructed, feed_dict={ self.input_: user_ratings, self.input_pos: users, self.training: False }) data_reconstructed[users] = ratings_reconstructed predictions = np.zeros(len(users_test)) for i, (user, movie) in enumerate(zip(users_test, movies_test)): predictions[i] = data_reconstructed[user, movie] return predictions
def predict(self, checkpoint, x_test): pred_logits = [] hypotheses_test = [] references_test = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, checkpoint) for batch_i, (input_batch, output_batch, sent_lengths) in enumerate( utils.get_batches(x_test, self.batch_size)): result = sess.run(self.validate_sent, feed_dict={self.input_data: input_batch, self.source_sentence_length: sent_lengths, self.keep_prob: 1.0, }) pred_logits.extend(result) for pred, actual in zip(result, output_batch): hypotheses_test.append( word_tokenize(" ".join( [self.idx_word[i] for i in pred if i not in [self.pad, -1, self.eos]]))) references_test.append([word_tokenize( " ".join([self.idx_word[i] for i in actual if i not in [self.pad, -1, self.eos]]))]) bleu_scores = utils.calculate_bleu_scores(references_test, hypotheses_test) print('BLEU 1 to 4 : {}'.format(' | '.join(map(str, bleu_scores)))) return pred_logits
def test(self, path, batch_size=8): batches_test = utils.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None) return batches_test, self.model.predict_generator( batches_test, batches_test.nb_sample)
def get_latent_space(self, data, log_path=None): if log_path is None: log_path = DEFAULT_LOG_PATH with tf.Graph().as_default(): with tf.Session() as sess: self.build_graph() saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(log_path)) data_latent = np.zeros((data.shape[0], self.layers[-1])) for rows in get_batches(list(range(data.shape[0])), batch_size=64, do_shuffle=False): rows_features = [data[i, :] for i in rows] rows_latent = sess.run(self.intermediate_representation, feed_dict={ self.input_: rows_features, self.training: False }) data_latent[rows] = rows_latent return data_latent
def probe(parser, buckets, probe_idx): pr_h = [] pr_H = [] pr_M = [] pr_HL = [] pr_ML = [] pr_lcoeff = [] pr_rcoeff = [] pr_lcoeff_rel = [] pr_rcoeff_rel = [] for mini_batch in utils.get_batches(buckets, parser, False): outputs, dims = parser.build_graph(mini_batch) seq_len, batch_size = dims h, H, M, HL, ML, arc_left_coeff, arc_right_coeff, rel_left_coeff, rel_right_coeff = \ map(lambda t: t.npvalue(), outputs) for i in range(batch_size): pr_h.append(h[:, probe_idx, i]) pr_H.append(H[:, probe_idx, i]) pr_M.append(M[:, probe_idx, i]) pr_HL.append(HL[:, probe_idx, i]) pr_ML.append(ML[:, probe_idx, i]) pr_lcoeff.append(arc_left_coeff[:, probe_idx, i]) pr_rcoeff.append(arc_right_coeff[:, probe_idx, i]) pr_lcoeff_rel.append(rel_left_coeff[:, probe_idx, i]) pr_rcoeff_rel.append(rel_right_coeff[:, probe_idx, i]) dy.renew_cg() return [ pr_h, pr_H, pr_M, pr_HL, pr_ML, pr_lcoeff, pr_rcoeff, pr_lcoeff_rel, pr_rcoeff_rel ]
def get_batches_and_data(path, target_size=(224, 224)): batches = utils.get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size) array = np.concatenate([batches.next() for i in range(batches.nb_sample)]) batches.reset() return batches, array
def reconstruct(self, data, log_path=None): if log_path is None: log_path = DEFAULT_LOG_PATH with tf.Graph().as_default(): with tf.Session() as sess: self.build_graph() saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(log_path)) data_reconstructed = np.zeros((data.shape[0], self.number_of_features)) for rows in get_batches(list(range(data.shape[0])), batch_size=64, do_shuffle=False): rows_features = [data[i, :] for i in rows] rows_reconstructed = sess.run(self.input_reconstructed, feed_dict={ self.input_: rows_features, self.training: False }) data_reconstructed[rows] = rows_reconstructed return data_reconstructed
def get_loaded_models_and_batches(img_rows, img_cols, channel, batch_size, data_path, model_path, imagenet_model_path): """ # TODO Docstring. """ # Get training set image batches. TODO Might want to pickle these and not load every time. batches = utils.get_batches( data_path + 'train', gen=image.ImageDataGenerator( preprocessing_function=utils.vgg_preprocess), batch_size=batch_size, shuffle=False, class_mode=None) # Construct models. model_vgg19 = vgg19_model(img_rows, img_cols, channel, batches.nb_class, imagenet_model_path) model_inception_v1 = googlenet_model(img_rows, img_cols, channel, batches.nb_class, imagenet_model_path) # Load weights. model_vgg19.load_weights(model_path + 'vgg19_model_60.h5') model_inception_v1.load_weights(model_path + 'inception_model_adam_100.h5') return batches, model_vgg19, model_inception_v1
def validate(self, sess, x_val): # Calculate BLEU on validation data hypotheses_val = [] references_val = [] for batch_i, (input_batch, output_batch, sent_lengths) in enumerate( utils.get_batches(x_val, self.batch_size)): pred_sentences, self._validate_logits = sess.run( [self.validate_sent, self.validate_logits], feed_dict={self.input_data: input_batch, self.source_sentence_length: sent_lengths, self.keep_prob: 1.0, }) for pred, actual in zip(pred_sentences, output_batch): hypotheses_val.append( word_tokenize( " ".join([self.idx_word[i] for i in pred if i not in [self.pad, -1, self.eos]]))) references_val.append( [word_tokenize(" ".join([self.idx_word[i] for i in actual if i not in [self.pad, -1, self.eos]]))]) self.val_pred = ([" ".join(sent) for sent in hypotheses_val]) self.val_ref = ([" ".join(sent[0]) for sent in references_val]) bleu_scores = utils.calculate_bleu_scores(references_val, hypotheses_val) self.epoch_bleu_score_val['1'].append(bleu_scores[0]) self.epoch_bleu_score_val['2'].append(bleu_scores[1]) self.epoch_bleu_score_val['3'].append(bleu_scores[2]) self.epoch_bleu_score_val['4'].append(bleu_scores[3])
def run(self, sess, saver, writer, train, test, frequencies): #def run(self, sess, writer, train, test, frequencies): self.word_to_id = self.load_word_lookup(frequencies) for epoch in range(self.num_epochs): print '-----Epoch', epoch, '-----' batches = utils.get_batches(train, self.config.batch_size) start_time = datetime.datetime.now() for batch in tqdm(batches): embedded, labels = self.load_embeddings(batch[0], batch[1]) loss = self.train_on_batch(sess, embedded, labels) summary = tf.summary.scalar('loss', loss) # for logging self.writer.add_summary(summary, self.global_step) self.global_step += 1 # training status if self.global_step % self.print_interval == 0: perplexity = math.exp(float(loss)) if loss < 300 else float('inf') tqdm.write("----- Step %d -- Loss %.2f -- Perplexity %.2f" % (self.global_step, loss, perplexity)) # run test periodically loss = self.predict_on_batch(sess, embedded) # save checkpoint #if self.global_step % self.save_interval == 0: # self.save_session(sess) end_time = datetime.datetime.now() print 'Epoch finished in ', end_time-start_time, 'ms'
def optimize(self, X_train, y_train, X_val, y_val): num_data = len(X_train) eval_size = min(len(X_val), len(X_train)) # Epoch level loop best_acc = 0.0 loss_hist_length = 15 recent_losses = [2.4] * loss_hist_length for cur_epoch in range(1, self.FLAGS.epochs + 1): # Randomly shuffle data and divide into batches X_batches, y_batches, num_batches = get_batches( X_train, y_train, self.FLAGS.batch_size) # Training loop for i, (X_batch, y_batch) in enumerate(zip(X_batches, y_batches), start=1): # Optimatize using batch loss, norm, step = self.step(X_batch, y_batch) recent_losses[i % loss_hist_length] = loss avg_loss = float(np.mean(recent_losses)) self.experiment.log_loss(avg_loss) # Use smoothed loss self.experiment.log_metric("norm", norm) # Print relevant params num_complete = int(20 * (self.FLAGS.batch_size * i / num_data)) sys.stdout.write('\r') sys.stdout.write( "EPOCH %d: (Batch Loss: %.3f | Avg Loss %.3f) [%-20s] (%d/%d) [norm: %.2f] [step: %d] [lr: %f]" % (cur_epoch, loss, avg_loss, '=' * num_complete, min(i * self.FLAGS.batch_size, num_data), num_data, norm, step, self.lr)) sys.stdout.flush() self.experiment.log_step(int(step)) sys.stdout.write('\n') # Evaluate accuracy train_acc = self.evaluate(X_train, y_train, eval_size) print("Training Accuracy: {}\ton {} examples".format( train_acc, eval_size)) self.experiment.log_metric("train_acc", train_acc) val_acc = self.evaluate(X_val, y_val, eval_size) print("Validation Accuracy: {}\ton {} examples".format( val_acc, eval_size)) self.experiment.log_accuracy(val_acc) # Early stopping if val_acc > best_acc: best_acc = val_acc save_path = self.saver.save( self.sess, os.path.join(self.save_dir, self.FLAGS.save_name)) print("Model saved to: {}".format(save_path)) # Decay learning rate if cur_epoch % 10 == 0: self.lr /= 2
def generate_answers(sess, model, dataset, rev_vocab): """ Loop over the dev or test dataset and generate answer. Note: output format must be answers[uuid] = "real answer" You must provide a string of words instead of just a list, or start and end index In main() function we are dumping onto a JSON file evaluate.py will take the output JSON along with the original JSON file and output a F1 and EM You must implement this function in order to submit to Leaderboard. :param sess: active TF session :param model: a built QASystem model :param rev_vocab: this is a list of vocabulary that maps index to actual words :return: """ val_questions = [ map(int, dataset["val_questions"][i].split()) for i in xrange(len(dataset["val_questions"])) ] val_context = [ map(int, dataset["val_context"][i].split()) for i in xrange(len(dataset["val_context"])) ] questions_padded, questions_masked = pad_inputs(val_questions, FLAGS.max_question_size) context_padded, context_masked = pad_inputs(val_context, FLAGS.max_paragraph_size) answers = {} unified_dataset = zip(questions_padded, questions_masked, context_padded, context_masked, dataset["val_question_uuids"]) batches, num_batches = get_batches(unified_dataset, FLAGS.batch_size) for batch in tqdm(batches): val_questions, val_question_masks, val_paragraphs, val_paragraph_masks, uuids = zip( *batch) a_s, a_e = model.answer(sess, val_questions, val_paragraphs, val_question_masks, val_paragraph_masks) for s, e, paragraph, uuid in zip(a_s, a_e, val_paragraphs, uuids): token_answer = paragraph[ s:e + 1] #The slice of the context paragraph that is our answer sentence = [rev_vocab[token] for token in token_answer] our_answer = ' '.join(word for word in sentence) answers[uuid] = our_answer print("Generated {}/{} answers".format(len(answers), len(dataset["val_question_uuids"]))) return answers
def train(self, x_train, x_val, labels_train, labels_val): print('[INFO] Training process started') learning_rate = self.initial_learning_rate iter_i = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(self.logs_dir, sess.graph) for epoch_i in range(1, self.epochs + 1): start_time = time.time() for batch_i, (input_batch, output_batch, labels_batch, sent_lengths) in enumerate( utils.get_batches(x_train, labels_train, self.batch_size)): try: iter_i += 1 _, _summary, self.train_xent, p_means = sess.run( [ self.train_op, self.summary_op, self.xent_loss, self.prior_means ], feed_dict={ self.input_data: input_batch, # <batch x maxlen> self.labels: labels_batch, self.target_data: output_batch, # <batch x maxlen> self.lr: learning_rate, self.source_sentence_length: sent_lengths, self.target_sentence_length: sent_lengths, self.keep_prob: self.dropout_keep_prob, self.z_temperature: self.z_temp, self.lambda_coeff: self.lambda_val, }) writer.add_summary(_summary, iter_i) except Exception as e: print(iter_i, e) exit(1) pass # Reduce learning rate, but not below its minimum value learning_rate = np.max([ self.min_learning_rate, learning_rate * self.learning_rate_decay ]) time_consumption = time.time() - start_time self.monitor(x_val, labels_val, sess, epoch_i, time_consumption) if epoch_i % 10 == 0 and epoch_i != 0: self.generate_samples(sess, epoch_i)
def predict(self, context, model_input): predictions = [] for batch in get_batches(list(model_input.text), size=2): input_ids = self.tokenizer(batch, truncation=True, padding=True, return_tensors="pt")["input_ids"] predictions.append( sigmoid(self.model(input_ids).logits.detach().numpy())) return np.concatenate(predictions)
def get_batches(self, batch_type, gen=image.ImageDataGenerator(), shuffle=False, batch_size=None): if batch_size is None: batch_size = self.batch_size return get_batches(os.path.join(self.path, batch_type), gen=gen, shuffle=shuffle, batch_size=self.batch_size)
def train_NN(self, labels=None, noise_level=None, save_state=None, batch_size=100, epoch_size=10): # train NN and return accuracy and predictions if save_state == 'before': print('Training with naive parameters...') if save_state == 'after': print('Training with updated parameters...') sess = tf.Session() sess.run(tf.initialize_all_variables()) batches_x, batches_y = ut.get_batches(self.train_x, labels, batch_size=batch_size, epoch_size=epoch_size) # for i in range(len(batches_x)): batch_xs, batch_ys = batches_x[i], batches_y[i] sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) prob_y = sess.run(y, feed_dict={x: self.train_x}) acc = sess.run(accuracy, feed_dict={x: self.test_x, y_: self.test_y}) save_path = saver.save(sess, self.model_path + noise_level + save_state + ".ckpt") print("Model saved in file: %s" %save_path) print("Accuracy was: %s" %acc) return acc, prob_y