def run_epoch(self, session, train_set, train_raw, epoch): total_batches = int(len(train_set) / self.config.batch_size) train_minibatches = minibatches(train_set, self.config.batch_size, self.config.dataset) training_loss = 0.0 training_accuracy = 0.0 infer_label = [] prediction_all = [] for batch in tqdm(train_minibatches, desc="Trainings", total=total_batches): if len(batch[0]) != self.config.batch_size: continue session.run(self.model.inc_step) loss, accuracy, summary, global_step, infer_label_batch, prediction = self.train_single_batch( session, *batch) _ = [infer_label.append(x) for x in infer_label_batch] _ = [prediction_all.append(x) for x in prediction] self.train_writer.add_summary(summary, global_step) self.saver_embed.save(session, './temp/embedding_test.ckpt', 1) training_accuracy += accuracy training_loss += loss training_loss = training_loss / total_batches training_accuracy = training_accuracy / total_batches print( classification_report(infer_label, prediction_all, target_names=['can\'t', 'can'])) score = f1_score(y_true=infer_label, y_pred=prediction_all) print("Loss", training_loss) print("F1_score", score) return score
def validate(self, session, validation_set, validation_raw, epoch): total_batches = int(len(validation_set) / self.config.batch_size) validation_accuracy = 0.0 validation_loss = 0.0 infer_label = [] prediction_all = [] validate_minibatches = minibatches(validation_set, self.config.batch_size, self.config.dataset) for batch in tqdm(validate_minibatches, total=total_batches, desc="Validate"): if len(batch[0]) != self.config.batch_size: continue loss, accuracy, summary, global_step, infer_label_batch, prediction = self.validate_single_batch( session, *batch) self.valid_writer.add_summary(summary, global_step) validation_accuracy += accuracy validation_loss += loss _ = [infer_label.append(x) for x in infer_label_batch] _ = [prediction_all.append(x) for x in prediction] validation_loss = validation_loss / total_batches validation_accuracy = validation_accuracy / total_batches print( classification_report(infer_label, prediction_all, target_names=['can\'t', 'can'])) score = f1_score(y_true=infer_label, y_pred=prediction_all, average='weighted') print("Loss", validation_loss) print("F1_score", score) return score
def validate(self, session, dataset): batch_num = int(np.ceil(len(dataset) * 1.0 / self.config.batch_size)) valid_minibatch = minibatches(dataset, self.config.batch_size) valid_loss = 0 valid_accuracy = 0 for i, batch in enumerate(valid_minibatch): loss, accuracy, prediction = self.test(session, batch) valid_loss += loss valid_accuracy += accuracy valid_loss = valid_loss / self.config.batch_size valid_accuracy = valid_accuracy / self.config.batch_size return valid_loss, valid_accuracy
def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw): prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, *batch) prog.update(i + 1, [("train loss", loss)]) if self.report: self.report.log_train_loss(loss) print("") logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) f1 = entity_scores[-1] return f1
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). """ if inputs is None: inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw)) preds = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)): # Ignore predict batch = batch[:1] + batch[2:] preds_ = self.predict_on_batch(sess, *batch) preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(inputs_raw, inputs, preds)
def run_epoch(self, sess, train_set, valid_set, train_raw, valid_raw, epoch): train_minibatch = minibatches(train_set, self.config.batch_size) global_loss = 0 global_accuracy = 0 set_num = len(train_set) batch_size = self.config.batch_size batch_count = int(np.ceil(set_num * 1.0 / batch_size)) for i, batch in enumerate(train_minibatch): loss, summary = self.train_on_batch(sess, *batch) self.writer.add_summary(summary, epoch * batch_count + i) print("Loss-", loss) #logging.info('-' + "EVALUATING ON TRAINING" + '-') train_dataset = [train_set, train_raw] train_score = self.evaluate_answer(sess, train_dataset) #print("training-accuracy",train_score) #logging.info('-' + "EVALUATING ON VALIDATION" + '-') valid_dataset = [train_set, train_raw] score = self.evaluate_answer(sess, valid_dataset) #print("validation-accuracy",score) global_loss += loss return global_loss, summary
def predict_on_batch(self, session, dataset): predict_minibatch = minibatches(dataset, self.config.batch_size) preds = [] for i, batch in enumerate(predict_minibatch): preds.append(self.answer(session, batch)) return preds