def run(cls, eval_flag=True): """ @param eval_flag: if eval_flag is True, the evaluation output is given. """ ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) wsd = cls() TRAIN_DIR = os.path.join(ROOT, 'train/') TEST_DIR = os.path.join(ROOT, 'test/') TEST_NAME_FILE = os.path.join(ROOT, 'test/namefile') RESULT_PATH = os.path.join(ROOT, 'result/%s_result.txt' % cls.__name__) cls.result_path = RESULT_PATH # clear the file RESULT_PATH with open(RESULT_PATH, 'wb') as f: pass result_obj = open(RESULT_PATH, 'ab') count = 0 test_words = cls.get_words(TEST_NAME_FILE) for word in test_words: test_path = os.path.join(TEST_DIR, word) train_path = os.path.join(TRAIN_DIR, word) features_label = wsd.load_features(train_path) wsd.train(features_label) result = wsd.classify(test_path) wsd.dump_result(result, result_obj) count += 1 print 'Finish %d of %d: %s' % (count, len(test_words), word) result_obj.close() print 'Write testing results to %s' % RESULT_PATH if eval_flag: answerfile = os.path.join(ROOT, 'result/test_answer') evaluate(RESULT_PATH, answerfile) return None
def main(): HiddenNum = 15 learningRate = 0.3 itNum = 40 bpt = bpTest() outfile = bpt.predict("../train/","../test/","../result/",HiddenNum,itNum,learningRate) util.evaluate(outfile, "../result/test_answer")
def main(): #trainfile = "../train/中医" #testfile = "../test/中医" #bpt = bpTest(trainfile, testfile) bpt = bpTest() outfile = bpt.predict("../train/","../test/","../result/") util.evaluate(outfile, "../result/test_answer")
def evaluate_batch(data_source, model, max_batches, eval_file): answer_dict = {} total_loss, step_cnt = 0, 0 for step, data in enumerate(data_source): if step >= max_batches and max_batches > 0: break context_idxs = Variable(data['context_idxs'], volatile=True) ques_idxs = Variable(data['ques_idxs'], volatile=True) context_char_idxs = Variable(data['context_char_idxs'], volatile=True) ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True) context_lens = Variable(data['context_lens'], volatile=True) y1 = Variable(data['y1'], volatile=True) y2 = Variable(data['y2'], volatile=True) graph = data['graph'] graph_q = data['graph_q'] elmo = data['elmo'] elmo_q = data['elmo_q'] if elmo is not None: elmo.volatile = True elmo_q.volatile = True logit1, logit2, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, return_yp=True, pre_att=graph, pre_att_q=graph_q, elmo=elmo, elmo_q=elmo_q) loss = criterion(logit1, y1) + criterion(logit2, y2) answer_dict_, _ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist()) answer_dict.update(answer_dict_) total_loss += loss.data[0] step_cnt += 1 loss = total_loss / step_cnt metrics = evaluate(eval_file, answer_dict) metrics['loss'] = loss return metrics
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2, = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle}) answer_dict_, _ = convert_tokens(eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum]
def test(config): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] print("Loading model...") test_batch = get_dataset(config.test_record_file, get_record_parser(config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) graph_handler = GraphHandler(config, model) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) #saver = tf.train.Saver() graph_handler.initialize(sess) #saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} remapped_dict = {} ensember_dict = {} for step in tqdm(range(total // config.batch_size + 1)): start_logits, stop_logits, qa_id, loss, yp1, yp2 = sess.run([ model.start_logits, model.stop_logits, model.qa_id, model.loss, model.yp1, model.yp2 ]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) start_logits.tolist() stop_logits.tolist() for id, start, stop in zip(qa_id, start_logits, stop_logits): ensember_dict[str(id)] = {'yp1': start, 'yp2': stop} loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) answer_path = config.answer_file + "_" + str(config.load_step) with open(answer_path, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format(metrics['exact_match'], metrics['f1'])) ensember_dict['loss'] = loss ensember_dict['exact_math'] = metrics['exact_match'] ensember_dict['f1'] = metrics['f1'] file_name = config.model_name + '_' + config.run_id + '.pklz' save_path = os.path.join(config.result_path, file_name) with gzip.open(save_path, 'wb', compresslevel=3) as fh: pickle.dump(ensember_dict, fh)
def test(config, dataset="test"): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) if dataset == "test": test_eval_file = config.test_eval_file test_meta = config.test_meta test_record_file = config.test_record_file elif dataset == "addsent": print('HELLO') test_eval_file = config.addsent_eval_file test_meta = config.addsent_meta test_record_file = config.addsent_record_file elif dataset == "addonesent": test_eval_file = config.addonesent_eval_file test_meta = config.addonesent_meta test_record_file = config.addonesent_record_file with open(test_eval_file, "r") as fh: eval_file = json.load(fh) with open(test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] print("Loading model...") test_batch = get_dataset(test_record_file, get_record_parser(config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format(metrics['exact_match'], metrics['f1']))
def evaluate_batch(data_source, model, max_batches, eval_file, config): answer_dict = {} sp_dict = {} total_loss, step_cnt = 0, 0 iter = data_source for step, data in enumerate(iter): if step >= max_batches and max_batches > 0: break context_idxs = Variable(data['context_idxs'], volatile=True) ques_idxs = Variable(data['ques_idxs'], volatile=True) context_char_idxs = Variable(data['context_char_idxs'], volatile=True) ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True) context_lens = Variable(data['context_lens'], volatile=True) y1 = Variable(data['y1'], volatile=True) y2 = Variable(data['y2'], volatile=True) q_type = Variable(data['q_type'], volatile=True) is_support = Variable(data['is_support'], volatile=True) start_mapping = Variable(data['start_mapping'], volatile=True) end_mapping = Variable(data['end_mapping'], volatile=True) all_mapping = Variable(data['all_mapping'], volatile=True) # subject_y1 = Variable(data['subject_y1']) subject_y2 = Variable(data['subject_y2']) object_y1 = Variable(data['object_y1']) object_y2 = Variable(data['object_y2']) relations = Variable(data['relations']) # # model_results = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, relations, \ context_lens, start_mapping, end_mapping, all_mapping, return_yp=True) (logit1, logit2, predict_type, predict_support, logit_subject_start, logit_subject_end, \ logit_object_start, logit_object_end, k_relations, loss_relation, yp1, yp2, sy1, sy2, oy1, oy2) = model_results loss_1 = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) + nll_sum(logit2, y2)) / context_idxs.size(0) loss_2 = nll_average(predict_support.view(-1, 2), is_support.view(-1)) loss_3_r = torch.sum(loss_relation) loss_3_s = (nll_sum(logit_subject_start, subject_y1) + nll_sum( logit_subject_end, subject_y2)) / context_idxs.size(0) loss_3_o = (nll_sum(logit_object_start, object_y1) + nll_sum( logit_object_end, object_y2)) / context_idxs.size(0) loss = loss_1 + config.sp_lambda * loss_2 + config.evi_lambda * ( loss_3_s + loss_3_r + loss_3_o) answer_dict_ = convert_tokens( eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1)) answer_dict.update(answer_dict_) total_loss += loss.item() # total_loss += loss.data[0] step_cnt += 1 loss = total_loss / step_cnt metrics = evaluate(eval_file, answer_dict) metrics['loss'] = loss return metrics
def test(config): gpu_options = tf.GPUOptions(visible_device_list="2") sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess_config.gpu_options.allow_growth = True with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] print("Loading model...") test_batch = get_dataset(config.test_record_file, get_record_parser(config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} remapped_dict = {} # tqdm for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_, outlier = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) print("\n", loss) if (loss > 50): for i, j, k in zip(qa_id.tolist(), yp1.tolist(), yp2.tolist()): print(answer_dict[str(i)], j, k) #print("IDs: {} Losses: {} Yp1: {} Yp2: {}".format(qa_id.tolist(),\ # loss.tolist(), yp1.tolist(), yp2.tolist())) loss = np.mean(losses) # evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict # since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {} Rouge-l-f: {} Rouge-l-p: {} Rouge-l-r: {}".format(\ metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\ metrics['rouge-l-r']))
def train_model(): rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300 + 1024 + 50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.3, dropout2=0.2, dropout3=0.2) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.01, momentum=0.9) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 20 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] # A counter for the number of gradient updates num_iter = 0 for epoch in tqdm(range(num_epochs)): # print("Starting epoch {}".format(epoch + 1)) for (example_text, example_lengths, labels) in train_dataloader_vua: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, 2) predicted = rnn_clf(example_text, example_lengths) batch_loss = nll_criterion(predicted, labels) rnn_clf_optimizer.zero_grad() batch_loss.backward() rnn_clf_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate( val_dataloader_vua, rnn_clf, nll_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1.append(f1) print( "Iteration {}. Validation Loss {}. Accuracy {}. Precision {}. Recall {}. F1 {}. class-wise F1 {}." .format(num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) filename = f'../models/classification/VUA_iter_{str(num_iter)}.pt' torch.save(rnn_clf.state_dict(), filename) # print("Training done!") return rnn_clf, nll_criterion
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle, config): answer_dict = {} losses_esp = losses_pr = losses_ee = [] outlier_count = 0 for _ in tqdm(range(1, num_batches + 1)): if config.with_passage_ranking: qa_id, loss_esp, loss_pr, loss_ee, yp1, yp2, = sess.run( [model.qa_id, model.loss, model.pr_loss, model.e_loss, model.yp1, model.yp2], feed_dict={handle: str_handle}) else: qa_id, loss_esp, yp1, yp2, = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle}) answer_dict_, _, outlier = convert_tokens( config, eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) if outlier: outlier_count += 1 continue answer_dict.update(answer_dict_) if loss_esp<100: losses_esp.append(loss_esp) print(loss_esp) if config.with_passage_ranking: losses_pr.append(loss_pr) losses_ee.append(loss_ee) #print("outlier_count:",outlier_count) loss_esp = np.mean(losses_esp) print("dev_loss:",loss_esp) if config.with_passage_ranking: loss_pr = np.mean(losses_pr) loss_ee = np.mean(losses_ee) metrics = evaluate(eval_file, answer_dict) metrics["loss_esp"] = loss_esp metrics["loss_ee"] = loss_esp if config.with_passage_ranking: metrics["loss_pr"] = loss_pr metrics["loss_ee"] = loss_ee loss_sum1 = tf.Summary(value=[tf.Summary.Value( tag="{}/loss_esp".format(data_type), simple_value=metrics["loss_esp"]), ]) if config.with_passage_ranking: loss_sum2 = tf.Summary(value=[tf.Summary.Value( tag="{}/loss_pr".format(data_type), simple_value=metrics["loss_pr"]), ]) loss_sum3 = tf.Summary(value=[tf.Summary.Value( tag="{}/loss_ee".format(data_type), simple_value=metrics["loss_ee"]), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) rouge_l_f = tf.Summary(value=[tf.Summary.Value( tag="{}/ROUGE-L".format(data_type), simple_value=metrics["rouge-l-f"]), ]) rouge_l_p = tf.Summary(value=[tf.Summary.Value( tag="{}/rouge-l-p".format(data_type), simple_value=metrics["rouge-l-p"]), ]) rouge_l_r = tf.Summary(value=[tf.Summary.Value( tag="{}/rouge-l-r".format(data_type), simple_value=metrics["rouge-l-r"]), ]) outlier_c = tf.Summary(value=[tf.Summary.Value( tag="{}/outlier_count".format(data_type), simple_value=outlier_count), ]) if config.with_passage_ranking: return metrics, [loss_sum1, loss_sum2, loss_sum3, rouge_l_f] return metrics, [loss_sum1, rouge_l_f]
def main(args: argparse.Namespace) -> None: classifier = model.BorrowingsClassifier(args.modeltype) model_path = args.modelpath if args.modelpath else "model" if args.train: classifier.train(args.train) with open(model_path, "wb") as sink: pickle.dump(classifier, sink) else: with open(model_path, "rb") as source: classifier = pickle.load(source) if args.dev or args.test: eval_path = args.dev if args.dev else args.test predictions, gold = classifier.predict(eval_path) if args.dev: util.evaluate(gold, predictions) if args.test: util.write_file(predictions, args.test)
def train(self, X, Y, use_attention, att_context, bidirectional, cv=True, folds=5): if cv: cv_folds = make_folds(X, Y, folds) accuracies = [] fscores = [] for fold_num, ((train_fold_X, train_fold_Y), (test_fold_X, test_fold_Y)) in enumerate(cv_folds): tagger = self.fit_model(train_fold_X, train_fold_Y, use_attention, att_context, bidirectional) pred_probs, pred_label_seqs, x_lens = self.predict( test_fold_X, bidirectional, tagger=tagger) pred_inds = numpy.argmax(pred_probs, axis=2) flattened_preds = [] flattened_targets = [] for x_len, pred_ind, test_target in zip( x_lens, pred_inds, test_fold_Y): flattened_preds.extend(pred_ind[-x_len:]) flattened_targets.extend( [list(tt).index(1) for tt in test_target[-x_len:]]) assert len(flattened_preds) == len(flattened_targets) accuracy, weighted_fscore, all_fscores = evaluate( flattened_targets, flattened_preds) print >> sys.stderr, "Finished fold %d. Accuracy: %f, Weighted F-score: %f" % ( fold_num, accuracy, weighted_fscore) print >> sys.stderr, "Individual f-scores:" for cat in all_fscores: print >> sys.stderr, "%s: %f" % (self.rev_label_ind[cat], all_fscores[cat]) accuracies.append(accuracy) fscores.append(weighted_fscore) accuracies = numpy.asarray(accuracies) fscores = numpy.asarray(fscores) print >> sys.stderr, "Accuracies:", accuracies print >> sys.stderr, "Average: %0.4f (+/- %0.4f)" % ( accuracies.mean(), accuracies.std() * 2) print >> sys.stderr, "Fscores:", fscores print >> sys.stderr, "Average: %0.4f (+/- %0.4f)" % ( fscores.mean(), fscores.std() * 2) self.tagger = self.fit_model(X, Y, use_attention, att_context, bidirectional) model_ext = "att=%s_cont=%s_bi=%s" % (str(use_attention), att_context, str(bidirectional)) model_config_file = open("model_%s_config.json" % model_ext, "w") model_weights_file_name = "model_%s_weights" % model_ext model_label_ind = "model_%s_label_ind.json" % model_ext model_rep_reader = "model_%s_rep_reader.pkl" % model_ext print >> model_config_file, self.tagger.to_json() self.tagger.save_weights(model_weights_file_name, overwrite=True) json.dump(self.label_ind, open(model_label_ind, "w")) pickle.dump(self.rep_reader, open(model_rep_reader, "wb"))
def client_update(self, global_model, global_init_model, round_index): self.elapsed_comm_rounds += 1 print(f'***** Client #{self.client_id} *****', flush=True) self.model = copy_model(global_model, self.args.dataset, self.args.arch, dict(self.model.named_buffers())) num_pruned, num_params = get_prune_summary(self.model) cur_prune_rate = num_pruned / num_params #prune_step = math.floor(num_params * self.args.prune_step) eval_score = evaluate(self.model, self.test_loader, verbose=self.args.test_verbosity) if eval_score['Accuracy'][ 0] > self.args.acc_thresh and cur_prune_rate < self.args.prune_percent: # I'm adding 0.001 just to ensure we go clear the target prune_percent. This may not be needed prune_fraction = min( self.args.prune_step, 0.001 + self.args.prune_percent - cur_prune_rate) prune_fixed_amount(self.model, prune_fraction, verbose=self.args.prune_verbosity, glob=True) self.model = copy_model(global_init_model, self.args.dataset, self.args.arch, dict(self.model.named_buffers())) losses = [] accuracies = [] for i in range(self.args.client_epoch): train_score = train(round_index, self.client_id, i, self.model, self.train_loader, lr=self.args.lr, verbose=self.args.train_verbosity) losses.append(train_score['Loss'][-1].data.item()) accuracies.append(train_score['Accuracy'][-1]) mask_log_path = f'{self.args.log_folder}/round{round_index}/c{self.client_id}.mask' client_mask = dict(self.model.named_buffers()) log_obj(mask_log_path, client_mask) num_pruned, num_params = get_prune_summary(self.model) cur_prune_rate = num_pruned / num_params prune_step = math.floor(num_params * self.args.prune_step) print( f"num_pruned {num_pruned}, num_params {num_params}, cur_prune_rate {cur_prune_rate}, prune_step: {prune_step}" ) self.losses[round_index:] = np.array(losses) self.accuracies[round_index:] = np.array(accuracies) self.prune_rates[round_index:] = cur_prune_rate return copy_model(self.model, self.args.dataset, self.args.arch)
def evaluate(lang='pt'): X, Y = util.get_X_Y(data_type='keras_tokenized_tri', lang=lang, file_type="dump") X = np.asarray(X) data_generator = DataGenerator(X,Y, lang=lang, process_x=process_x, batch_size=PARAMS['batch_size']) model, epoch = load_lastest(lang=lang) x_val, y_val = data_generator.get_validation_data() y_pred = model.predict(x_val) y_pred = y_pred.argmax(axis=-1) print('Model '+NAME+' val score on '+lang+': ', util.evaluate(y_val, y_pred))
def evaluate_batch(config, model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} remapped_dict = {} losses = [] for _ in tqdm(range(1, num_batches+1 )): try: qa_id, loss, yp1, yp2 , y1, y2, is_select_p, is_select= sess.run( [model.qa_id, model.loss, model.yp1, model.yp2, model.y1, model.y2, model.is_select_p, model.is_select], feed_dict={ handle:str_handle }) except tf.errors.OutOfRangeError: break y1 = np.argmax(y1, axis=-1) y2 = np.argmax(y2, axis=-1) sp = np.argmax(is_select_p, axis=-1) s = np.argmax(is_select, axis=-1) sp = [ n+i*config.passage_num for i,n in enumerate(sp.tolist()) ] s = [ m+i*config.passage_num for i,m in enumerate(s.tolist()) ] answer_dict_, remapped_dict_ = convert_tokens( eval_file, [qa_id[n] for n in sp], [yp1[n] for n in sp], [yp2[n] for n in sp], [y1[n] for n in sp], [y2[n] for n in sp], sp, s) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict, filter=False) sp_metrics = evaluate(eval_file, remapped_dict, filter=False) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) sp_f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/sp_f1".format(data_type), simple_value=sp_metrics["f1"]), ]) sp_em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/sp_em".format(data_type), simple_value=sp_metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum, sp_f1_sum, sp_em_sum]
def evaluate_batch(data_source, model, max_batches, eval_file, config): answer_dict = {} sp_dict = {} total_loss, step_cnt = 0, 0 iter = data_source for step, data in enumerate(iter): if step >= max_batches and max_batches > 0: break with torch.no_grad(): if config.cuda: data = { k: (data[k].cuda() if k != 'ids' else data[k]) for k in data } context_idxs = data['context_idxs'] ques_idxs = data['ques_idxs'] context_char_idxs = data['context_char_idxs'] ques_char_idxs = data['ques_char_idxs'] context_lens = data['context_lens'] y1 = data['y1'] y2 = data['y2'] q_type = data['q_type'] is_support = data['is_support'] start_mapping = data['start_mapping'] end_mapping = data['end_mapping'] all_mapping = data['all_mapping'] logit1, logit2, predict_type, predict_support, yp1, yp2 = model( context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, start_mapping, end_mapping, all_mapping, context_lens.sum(1).max().item(), return_yp=True) loss = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) + nll_sum(logit2, y2) ) / context_idxs.size(0) + config.sp_lambda * nll_average( predict_support.view(-1, 2), is_support.view(-1)) answer_dict_ = convert_tokens( eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1)) answer_dict.update(answer_dict_) total_loss += loss.item() step_cnt += 1 loss = total_loss / step_cnt metrics = evaluate(eval_file, answer_dict) metrics['loss'] = loss return metrics
def pipeline(self, df): df_preprocess = self.preprocess(df) df_train = df_preprocess.groupby('id').apply( util.get_trainset).reset_index().drop('level_1', axis=1) df_test = df_preprocess.groupby('id').apply( util.get_testset).reset_index().drop('level_1', axis=1) model = self.model(df_train) df_pred = self.prediction(model, df_test) eval_scores = util.evaluate(df_pred) return df_pred, eval_scores
def test_epoch_end(self, test_step_outputs): pred_tags, tags = zip(*test_step_outputs) result = evaluate(self._to_ix, self.test_data, list(chain(*pred_tags)), self.m_type) self.log('char precision', result['char_level']['precision']) self.log('char recall', result['char_level']['recall']) self.log('char f1', result['char_level']['f1']) self.log('word precision', result['word_level']['precision']) self.log('word recall', result['word_level']['recall']) self.log('word f1', result['word_level']['f1'])
def evaluate_batch(data_source, model, max_batches, eval_file, config): answer_dict = {} sp_dict = {} total_loss, total_ans_loss, total_sp_loss, step_cnt = 0, 0, 0, 0 iter = data_source for step, data in enumerate(iter): if step >= max_batches and max_batches > 0: break context_idxs = Variable(data['context_idxs'], volatile=True) ques_idxs = Variable(data['ques_idxs'], volatile=True) context_char_idxs = Variable(data['context_char_idxs'], volatile=True) ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True) context_lens = Variable(data['context_lens'], volatile=True) y1 = Variable(data['y1'], volatile=True) y2 = Variable(data['y2'], volatile=True) q_type = Variable(data['q_type'], volatile=True) is_support = Variable(data['is_support'], volatile=True) start_mapping = Variable(data['start_mapping'], volatile=True) end_mapping = Variable(data['end_mapping'], volatile=True) all_mapping = Variable(data['all_mapping'], volatile=True) logit1, logit2, predict_type, predict_support, yp1, yp2 = model( context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, start_mapping, end_mapping, all_mapping, return_yp=True) loss_1 = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) + nll_sum(logit2, y2)) / context_idxs.size(0) loss_2 = nll_average(predict_support.view(-1, 2), is_support.view(-1)) loss = loss_1 + config.sp_lambda * loss_2 answer_dict_ = convert_tokens( eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1)) answer_dict.update(answer_dict_) total_loss += loss.data[0] total_ans_loss += loss_1.data[0] total_sp_loss += loss_2.data[0] step_cnt += 1 loss = total_loss / step_cnt ans_loss = total_ans_loss / step_cnt sp_loss = total_sp_loss / step_cnt metrics = evaluate(eval_file, answer_dict) metrics['loss'] = loss metrics['ans_loss'] = ans_loss metrics['sp_loss'] = sp_loss return metrics
def test(config): os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] graph = tf.Graph() print("Loading model...") with graph.as_default() as g: test_batch = get_dataset(config.test_record_file, get_record_parser(config, is_test=True), config).make_one_shot_iterator() model = QANet(config, test_batch, word_mat, char_mat, trainable=False, graph=g) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) if config.decay < 1.0: sess.run(model.assign_vars) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format(metrics['exact_match'], metrics['f1']))
def SelfEvaluate(self, batches, eval_file=None, answer_file=None, drop_file=None, dev=None): print('Starting evaluation') with open(eval_file, 'r', encoding='utf-8') as f: eval_file = json.load(f) with open(dev, 'r', encoding='utf-8') as f: dev = json.load(f) answer_dict = {} mapped_dict = {} for batch in batches: data = prepare_data(batch) full_p_states, p_mask, full_q_states, q_mask = self.encode(data) logits1, logits2, ans_log = self.decode(full_p_states, p_mask, full_q_states, q_mask) y1, y2, has_ans = get_predictions(logits1, logits2, ans_log) qa_id = data['id'] answer_dict_, mapped_dict_ = convert_tokens( eval_file, qa_id, y1, y2, has_ans) answer_dict.update(answer_dict_) mapped_dict.update(mapped_dict_) del full_p_states, p_mask, full_q_states, q_mask, y1, y2, answer_dict_, mapped_dict_, has_ans, ans_log, logits1, logits2 with open(drop_file, 'r', encoding='utf-8') as f: drop = json.load(f) for i in drop['drop_ids']: uuid = eval_file[str(i)]["uuid"] answer_dict[str(i)] = '' mapped_dict[uuid] = '' with open(answer_file, 'w', encoding='utf-8') as f: json.dump(mapped_dict, f) metrics = evaluate(dev, mapped_dict) # sub_path = join('./result/', "submit.csv") # #log.info('Writing submission file to {}...'.format(sub_path)) # with open(sub_path, 'w') as csv_fh: # csv_writer = csv.writer(csv_fh, delimiter=',') # csv_writer.writerow(['Id', 'Predicted']) # for uuid in sorted(mapped_dict): # csv_writer.writerow([uuid, mapped_dict[uuid]]) print("EM: {}, F1: {}, Has answer: {}, No answer: {}".format( metrics['exact'], metrics['f1'], metrics['HasAns_f1'], metrics['NoAns_f1'])) return metrics['exact'], metrics['f1']
def run_itemknn(X_train, X_test, test_dict): # Compute item-item matrix with cosine similarities S_cosine = util.compute_cosine(X_train) # Compute prediction scores for all test users - subtract already seen items test_users = list(test_dict.keys()) test_scores = X_test[test_users, :] @ S_cosine - 987654321 * X_test[ test_users, :] # Evaluate and pretty print results_cosine = util.evaluate(X_test, test_scores, test_dict) return results_cosine
def test_epoch(model, loader, criterion): model.eval() test_losses = [] outs = [] gts = [] for data in loader: for label in data[1].numpy().tolist(): gts.append(label) inputs, labels = transform_data(data, True) outputs = model(inputs) loss = criterion(outputs, labels) test_losses.append(loss.data[0]) out = torch.sigmoid(outputs).data.cpu().numpy() outs.extend(out) avg_loss = np.mean(test_losses) print('Validation Loss: {:.6f}'.format(avg_loss)) outs = np.array(outs) gts = np.array(gts) util.evaluate(gts, outs) return avg_loss
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] outlier_count = 0 for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2, is_selected = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2, model.logits_s], feed_dict={handle: str_handle}) if is_selected > 0.5: answer_dict_, _, outlier = convert_tokens(eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) if outlier: outlier_count += 1 losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) rouge_l_f = tf.Summary(value=[ tf.Summary.Value(tag="{}/rouge-l-f".format(data_type), simple_value=metrics["rouge-l-f"]), ]) rouge_l_p = tf.Summary(value=[ tf.Summary.Value(tag="{}/rouge-l-p".format(data_type), simple_value=metrics["rouge-l-p"]), ]) rouge_l_r = tf.Summary(value=[ tf.Summary.Value(tag="{}/rouge-l-r".format(data_type), simple_value=metrics["rouge-l-r"]), ]) outlier_c = tf.Summary(value=[ tf.Summary.Value(tag="{}/outlier_count".format(data_type), simple_value=outlier_count), ]) return metrics, [ loss_sum, f1_sum, em_sum, rouge_l_f, rouge_l_p, rouge_l_r, outlier_c ]
def end_of_epoch_hook(trainer): nonlocal i_epoch, best_dev_eer logger.info(f"EPOCH\t{i_epoch}") if i_epoch % args.eval_freq == 0: train_eer, train_eer_std = evaluate(args, trainer.models["trunk"], trainer.models["embedder"], eval_train_dataloaders) dev_eer, dev_eer_std = evaluate(args, trainer.models["trunk"], trainer.models["embedder"], eval_dev_dataloaders) logger.info("Eval EER (mean, std):\t{}\t{}".format( train_eer, train_eer_std)) logger.info("Eval EER (mean, std):\t{}\t{}".format( dev_eer, dev_eer_std)) if dev_eer < best_dev_eer: logger.info("New best model!") best_dev_eer = dev_eer i_epoch += 1
def test(config): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["num_batches"] print("Loading model...") test_batch = get_batch_dataset(config.test_record_file, get_record_parser( config, is_test=True), config, is_test=True).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} select_right = [] for step in tqdm(range(1, total + 1)): qa_id, loss, yp1, yp2 , y1, y2, is_select_p, is_select= sess.run( [model.qa_id, model.loss, model.yp1, model.yp2, model.y1, model.y2, model.is_select_p, model.is_select]) y1 = np.argmax(y1, axis=-1) y2 = np.argmax(y2, axis=-1) sp = np.argmax(is_select_p, axis=-1) s = np.argmax(is_select, axis=-1) sp = [ n+i*config.passage_num for i,n in enumerate(sp.tolist()) ] s = [ m+i*config.passage_num for i,m in enumerate(s.tolist()) ] select_right.append(len(set(s).intersection(set(sp)))) answer_dict_, _ = convert_tokens( eval_file, [qa_id[n] for n in sp], [yp1[n] for n in sp], [yp2[n] for n in sp], [y1[n] for n in sp], [y2[n] for n in sp], sp, s) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) select_accu = sum(select_right)/ (len(select_right)*(config.batch_size/config.passage_num)) write_prediction(eval_file, answer_dict, 'answer_for_evl.json', config) metrics = evaluate(eval_file, answer_dict, filter=False) metrics['Selection Accuracy'] = select_accu print("Exact Match: {}, F1: {}, selection accuracy: {}".format( metrics['exact_match'], metrics['f1'], metrics['Selection Accuracy']))
def prediction(features_test, labels_test, model_prediction): # print ("features test are ",features_test) predictions = np.array([]) # load the model from disk clf = joblib.load(model_prediction) for i in range(features_test.shape[0]): # X_test = scalingFactor.transform(featureMatrix[i, :]) X_test = [features_test[i, :]] # print("x test is", X_test) predictions = np.append(predictions, clf.predict(X_test)) #print predictions # print("prediction is",predictions) # print("y_test is",labels_test) # print ("Classification Report:") # print (metrics.classification_report(labels_test, predictions)) # print ("Confusion Matrix:") # print(metrics.confusion_matrix(labels_test, predictions)) util.evaluate(labels_test, predictions)
def predict(): if request.method == 'POST': ocr_text = request.get_json() print(ocr_text) original_sent = ocr_text['ocr_text'] output_sent = decode(original_sent)[0][0] evals = evaluate(original_sent, output_sent) result = {'predicted': output_sent, 'evaluated': evals} print(result) return jsonify(result) else: return '<h1>Error</h1>'
def train(show_baseline=False, continue_train=False, \ learn_freq= 5, memory_size = 20000, total_time=20,\ memory_warmup_size = 2000, batch_size = 32, critic_lr = 0.001, \ encoder_lr=0.0001, gamma = 0.9, alpha = 0.9, max_episode=1000, \ critic_path='dqn_critic', encoder_path='dqn_encoder',\ evaluate_env_list_path = 'env_list_set1'): if show_baseline: print(evaluate_reject_when_full(evaluate_env_list_path)) print(evaluate_totally_random(evaluate_env_list_path)) env = produce_env(total_time=total_time) action_dim = 4 obs_dim_1 = 45 request_dim = 17 obs_dim_2 = 10 obs_dim = obs_dim_1 + obs_dim_2 * 7 encoder = Encoder(input_size=request_dim, output_size=obs_dim_2, \ use_rnn=False, use_gru=True, use_lstm=False) rpm = ReplayMemory(memory_size) # DQN的经验回放池 critic = Critic(obs_dim=obs_dim, action_dim=action_dim) critic.to(device) agent = Agent(critic=critic, encoder=encoder, obs_dim=obs_dim, action_dim=action_dim, critic_lr=critic_lr, encoder_lr=encoder_lr, gamma=gamma, alpha=alpha) if continue_train: agent.load(critic_path=critic_path, encoder_path=encoder_path) # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够 while len(rpm) < memory_warmup_size: run_episode(env, agent, rpm, memory_warmup_size, learn_freq, batch_size) # start train episode = 0 while episode < max_episode: # 训练max_episode个回合,test部分不计算入episode数量 # train part for i in range(0, 100): total_reward = run_episode(env, agent, rpm, memory_warmup_size, learn_freq, batch_size) episode += 1 eval_reward = evaluate(evaluate_env_list_path, agent, render=False) print('episode:{} Test reward:{}'.format(episode, eval_reward)) agent.save(critic_path=critic_path, encoder_path=encoder_path)
def model_evaluation(model, dataset, device): examples, predicted_s, predicted_e, token_to_orig_maps = list(), list( ), list(), list() for batch in dataset.get_tqdm(device, shuffle=False): input_ids, input_mask, segment_ids, _, _, token_to_orig_map, example = batch inputs = { 'input_ids': input_ids, 'attention_mask': input_mask, 'token_type_ids': segment_ids } outputs = model(**inputs) examples.extend(example) predicted_s.extend(outputs[0].cpu().numpy()) predicted_e.extend(outputs[1].cpu().numpy()) token_to_orig_maps.extend(token_to_orig_map) evaluate(examples, predicted_s, predicted_e, token_to_orig_maps, entity_refine=False) evaluate(examples, predicted_s, predicted_e, token_to_orig_maps)
def main(): smooth_rate = 0.0001 ''' #------------------------------For Feature Extractor------------------------------ extractor = Extractor() extractor.extract("../corpus/train_corpus.xml", "../train/", "train", 7, 3, 2, " | ") extractor.extract("../corpus/test_corpus.xml", "../test/", "test", 7, 3, 2," | ") ''' names = util.readNames("../test/namefile") ''' #------------------------------For Random Validation------------------------------- fout = open("../result/Tune_Result "+str(time.ctime())+".csv","a") results = [] for name in names: infile = "../train/"+name nb = NaiveBayes(infile) print "---------",name,"----------" result = nb.Random_Cross_Validation(20,4,smooth_rate) results.append(result) reStr = name + "," + str(result) + "\n" fout.write(reStr) fout.close() print "Macro AVG:",sum(results)/len(results) ''' #------------------------------------For Test-------------------------------------- for name in names: print name nb2 = NaiveBayes() resultfile = nb2.predict("../train/", "../test/", "../result/",smooth_rate) util.evaluate(resultfile, "../result/test_answer")
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): """ Evaluate a """ all_yp3 = [] conter_high = 0 answer_dict = {} losses = [] for numb_b in (range(1, num_batches + 1)): qa_id, loss, yp1, yp2, yp3, y1, y2, y3, logging, logging2, q = sess.run( [ model.qa_id, model.loss, model.yp1, model.yp2, model.yp3, model.y1, model.y2, model.y3, model.logging, model.logging2, model.q ], feed_dict={handle: str_handle}) answer_dict_, _ = convert_tokens(eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist(), yp3.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) print(metrics) metrics["loss"] = loss loss_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum]
def train(self, train_data, valid_data): for epoch in range(self.epoch_num): self.model.train() train_loss = 0 train_wmae, train_nae = 0, 0 for step, (train_x, train_y) in enumerate(train_data): train_x = train_x.to(self.device) train_y = train_y.to(self.device) out = self.model(train_x) self.optimizer.zero_grad() loss = self.loss_func(out, train_y) loss.backward() self.optimizer.step() train_loss += loss.item() out = out.detach().cpu().numpy() train_y = train_y.detach().cpu().numpy() wmae, nae = evaluate(out, train_y) train_wmae += wmae train_nae += nae train_loss /= (step + 1) train_wmae /= (step + 1) train_nae /= (step + 1) valid_loss, valid_wmae, valid_nae = self.validate(valid_data) best_info = '' if valid_loss < self.best['loss']: self.best['loss'] = valid_loss best_info += ' Loss ' if valid_wmae < self.best['wmae']: self.best['wmae'] = valid_wmae best_info += ' WMAE ' if valid_nae < self.best['nae']: self.best['nae'] = valid_nae best_info += ' NAE ' self.record('\n------------ Epoch {} ----------- Best: {}'.format(epoch, best_info)) self.record('Train => Loss: {:.5f} | WMAE: {:.5f} | NAE: {:.5f}'.format(train_loss, train_wmae, train_nae)) self.record('Valid => Loss: {:.5f} | WMAE: {:.5f} | NAE: {:.5f}'.format(valid_loss, valid_wmae, valid_nae)) if self.save and 'NAE' in best_info: torch.save(self.model.state_dict(), self.save) self.record('\n========== Best record ==========') self.record('Loss: {:.5f} | WMAE: {:.5f} | NAE: {:.5f}'.format(self.best['loss'], self.best['wmae'], self.best['nae']))
def test(config): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] graph = tf.Graph() print("Loading model...") with graph.as_default() as g: test_batch = get_dataset(config.test_record_file, get_record_parser( config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False, graph = g) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) if config.decay < 1.0: sess.run(model.assign_vars) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format( metrics['exact_match'], metrics['f1']))
def test(prototypes, network_path = 'network_eyes.np'): """ Tests a saved network :param prototypes: :param network_path: :return: """ network = load_network(prototypes) input_var = T.fmatrix() target_var = T.fmatrix() val_prediction = layers.get_output(network, inputs = input_var, deterministic = True) val_loss = lasagne.objectives.squared_error(val_prediction, target_var) val_loss = val_loss.mean() val_fn = theano.function([input_var, target_var], [val_prediction, val_loss]) total_mean = 0 total_std = 0 n = 0 print "validation data in test" for inp_val, out_val in tqdm(iterate_data(data_file = 'validation_data_eyes.p')): predictions, loss = val_fn(inp_val, out_val) dist, mean, std = evaluate(predictions, out_val) n+=1 total_mean+=mean total_std+= std #eyes = Eyes(origin = 0, visualize = True) #eyes.set_dominance(0) for i, [left, right] in enumerate(predictions): #print left, right x, y = calc_intersect(left, right) print "predicted \t x: {} y: {}".format(x, y) #eyes.redraw() #point_target = eyes.move_eyes(out_val[i][0], out_val[i][1]) x1, y1 = calc_intersect(out_val[i][0], out_val[i][1]) print "target \t\t x: {} y: {}".format(x1,y1) print "should be \t x: {} y: {}".format(inp_val[i][0], inp_val[i][1]) #eyes.redraw() embed()
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2, = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle}) answer_dict_, _ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum]
def train(self, X, Y, use_attention, att_context, bidirectional, cv=True, folds=5): if cv: cv_folds = make_folds(X, Y, folds) accuracies = [] fscores = [] for fold_num, ((train_fold_X, train_fold_Y), (test_fold_X, test_fold_Y)) in enumerate(cv_folds): tagger = self.fit_model(train_fold_X, train_fold_Y, use_attention, att_context, bidirectional) pred_probs, pred_label_seqs, x_lens = self.predict(test_fold_X, bidirectional, tagger=tagger) pred_inds = numpy.argmax(pred_probs, axis=2) flattened_preds = [] flattened_targets = [] for x_len, pred_ind, test_target in zip(x_lens, pred_inds, test_fold_Y): flattened_preds.extend(pred_ind[-x_len:]) flattened_targets.extend([list(tt).index(1) for tt in test_target[-x_len:]]) assert len(flattened_preds) == len(flattened_targets) accuracy, weighted_fscore, all_fscores = evaluate(flattened_targets, flattened_preds) print >>sys.stderr, "Finished fold %d. Accuracy: %f, Weighted F-score: %f"%(fold_num, accuracy, weighted_fscore) print >>sys.stderr, "Individual f-scores:" for cat in all_fscores: print >>sys.stderr, "%s: %f"%(self.rev_label_ind[cat], all_fscores[cat]) accuracies.append(accuracy) fscores.append(weighted_fscore) accuracies = numpy.asarray(accuracies) fscores = numpy.asarray(fscores) print >>sys.stderr, "Accuracies:", accuracies print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(accuracies.mean(), accuracies.std() * 2) print >>sys.stderr, "Fscores:", fscores print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(fscores.mean(), fscores.std() * 2) self.tagger = self.fit_model(X, Y, use_attention, att_context, bidirectional) model_ext = "att=%s_cont=%s_bi=%s"%(str(use_attention), att_context, str(bidirectional)) model_config_file = open("model_%s_config.json"%model_ext, "w") model_weights_file_name = "model_%s_weights"%model_ext model_label_ind = "model_%s_label_ind.json"%model_ext model_rep_reader = "model_%s_rep_reader.pkl"%model_ext print >>model_config_file, self.tagger.to_json() self.tagger.save_weights(model_weights_file_name, overwrite=True) json.dump(self.label_ind, open(model_label_ind, "w")) pickle.dump(self.rep_reader, open(model_rep_reader, "wb"))
def train(self, trainfile_name): train_X, train_Y, num_classes = self.make_data(trainfile_name) accuracies = [] fscores = [] if self.cv: cv_folds = make_folds(train_X, train_Y, self.folds) for i, ((train_fold_X, train_fold_Y), (test_fold_X, test_fold_Y)) in enumerate(cv_folds): classifier = self.fit_model(train_fold_X, train_fold_Y, num_classes) predictions = self.classify(classifier, test_fold_X) accuracy, weighted_fscore, _ = evaluate(test_fold_Y, predictions) print >>sys.stderr, "Finished fold %d. Accuracy: %f, F-score: %f"%(i, accuracy, weighted_fscore) accuracies.append(accuracy) fscores.append(weighted_fscore) accuracies = numpy.asarray(accuracies) fscores = numpy.asarray(fscores) print >>sys.stderr, "Accuracies:", accuracies print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(accuracies.mean(), accuracies.std() * 2) print >>sys.stderr, "Fscores:", fscores print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(fscores.mean(), fscores.std() * 2) #self.classifier = self.fit_model(train_X, train_Y, num_classes) #cPickle.dump(classifier, open(self.trained_model_name, "wb")) #pickle.dump(tagset, open(self.stored_tagset, "wb")) print >>sys.stderr, "Done"
def test(self, shared_x, data_y, out_path=None): pred_y = self.predict(shared_x) if out_path: with codecs.open(out_path, 'wb') as f: f.writelines(['%s\t%s\n' % (x, y) for x, y in zip(data_y, pred_y)]) return evaluate(data_y, pred_y)
def train_network(prototypes, train_data = 'train_data.p', val_data = 'validation_data.p'): """ Legacy Code, doesnt work anymore Trains a single network (eithers arm of eye model) Also plots some of information about loss and accuracy. Input: prototypes of the model, train data, validation data Output: - """ network, train_fn, val_fn = create_network(prototypes) epochs = 150 means = np.zeros(epochs) stds = np.zeros(epochs) train_losses = np.zeros(epochs) val_losses = np.zeros(epochs) dists = np.zeros(epochs) arm = Arm(origin=0, visualize=False) print "Train network" for e in tqdm(range(epochs)): #Train epoch for input_batch, output_batch in iterate_data(data_file = train_data): pred, train_loss = train_fn(input_batch, output_batch) total_mean = 0 total_std = 0 total_dist = 0 n = 0 for inp_val, out_val in iterate_data(data_file = val_data): #validation epoch predictions, loss = val_fn(inp_val, out_val) dist, mean, std = evaluate(predictions, out_val) arm_positions = np.array([arm.move_arm(shoudler, elbow) for [shoudler, elbow] in predictions]) eye_error_dist, mean_eye_error, std_eye_error = evaluate(arm_positions, inp_val) n += 1 total_mean += mean total_std += std total_dist += mean_eye_error means[e] = total_mean/n stds[e] = total_std/n train_losses[e] = train_loss val_losses[e] = loss dists[e] = total_dist/n np.save('network_epoch' + str(e), layers.get_all_param_values(network)) #Plots plt.figure() distplot, = plt.plot(dists, label = 'arm distance error') plt.legend(handles = [distplot]) plt.savefig('../images/arm_error.png') plt.show() plt.figure() meanplot, = plt.plot(means, label = 'mean') stdplot, = plt.plot(stds, label = 'std') plt.legend(handles = [meanplot, stdplot]) plt.savefig('../images/arm_angles.png') plt.show() plt.figure() trainplot, = plt.plot(train_losses, label = 'train loss') valplot, = plt.plot(val_losses, label = 'val loss') plt.legend(handles = [trainplot, valplot]) plt.savefig('../images/arm_losses.png') plt.show() print "saving network" np.save('network_arm', layers.get_all_param_values(network)) print "done saving" return network, predictions
def train_network_scenario1(prototypes1, prototypes2, origin, train_data='train_data.p', val_data='validation_data.p'): """ Combines the networks for the arm and the eye. Arm is dominant over the eye, as in scenario 2, so the eye recieves its input from the arm and its target. Also plots a lot of information about loss and accuracy. Saves the weights of the network Input: prototypes for the arm, prototypes for the eye, point of origin of both models, trainingdata for the arm, validationdata for the eye Output: - """ epochs = 150 # number of epochs print 'network1' network1, train_fn1, val_fn1 = create_network(prototypes1) print 'network2' network2, train_fn2, val_fn2 = create_network(prototypes2, n_inputs=4) print 'Networks done' eyes = Eyes(origin=origin, visualize=False) arm = Arm(origin=origin, visualize=False) print 'moare stuff' # Arrays for saving performance after each epoch means_arm = np.zeros(epochs) stds_arm = np.zeros(epochs) train_losses_arm = np.zeros(epochs) val_losses_arm = np.zeros(epochs) means_eye = np.zeros(epochs) stds_eye = np.zeros(epochs) train_losses_eye = np.zeros(epochs) val_losses_eye = np.zeros(epochs) dists_eye = np.zeros(epochs) dists_arm = np.zeros(epochs) print "Train network" for e in tqdm(range(epochs)): total_mean_arm = 0 total_std_arm = 0 total_mean_eye = 0 total_std_eye = 0 total_error_arm = 0 total_error_eye = 0 train_loss_arm = 0 val_loss_arm = 0 train_loss_eye = 0 val_loss_eye = 0 # training epoch i = 0 for input_batch, output_batch in iterate_data(data_file=train_data): pred1, train_loss1 = train_fn1(input_batch, output_batch) arm_angles = np.array([arm.calculate_angles(x, y) for [x, y] in input_batch], dtype='float32') # same targets as arm eye_positions = [calc_intersect(left, right) for [left, right] in pred1] # get x,y from predicted eye angels arm_input = np.hstack((input_batch, eye_positions)).astype('float32') # first the eye coordinates, take care when combining prototypes pred2, train_loss2 = train_fn2(arm_input, arm_angles) train_loss_arm += train_loss2 train_loss_eye += train_loss1 i += 1 # Take average loss of this epoch train_loss_arm = train_loss_arm / i train_loss_eye = train_loss_eye / i n = 0 # Validation Epoch for inp_val, out_val in iterate_data(data_file=val_data): predictions_eye, loss_eye = val_fn1(inp_val, out_val) dist_eye, mean_eye, std_eye = evaluate(predictions_eye, out_val) # dist_arm is for debugging arm_angles = np.array([arm.calculate_angles(x, y) for [x, y] in inp_val], dtype='float32') eye_positions = [calc_intersect(left, right) for [left, right] in predictions_eye] arm_input = np.hstack((inp_val, eye_positions)).astype('float32') prediction_arm, loss_arm = val_fn2(arm_input, arm_angles) dist_arm, mean_arm, std_arm = evaluate(prediction_arm, inp_val) arm_positions = np.array([arm.move_arm(shoulder, shoulder) for [shoulder, elbow] in prediction_arm]) arm_error_dist, mean_arm_error, std_arm_error = evaluate(arm_positions, inp_val) eye_error_dist, mean_eye_error, std_eye_error = evaluate(eye_positions, inp_val) total_error_arm += mean_arm_error total_error_eye += mean_eye_error n += 1 total_mean_arm += mean_arm total_std_arm += std_arm total_mean_eye += mean_eye total_std_eye += std_eye val_loss_arm += loss_arm val_loss_eye += loss_eye # Save epoch data means_arm[e] = total_mean_arm / n stds_arm[e] = total_std_arm / n train_losses_arm[e] = train_loss_arm val_losses_arm[e] = val_loss_arm / n means_eye[e] = total_mean_eye / n stds_eye[e] = total_std_eye / n train_losses_eye[e] = train_loss_eye val_losses_eye[e] = val_loss_eye / n dists_eye[e] = total_error_eye / n dists_arm[e] = total_error_arm / n # Plots # Plot mean and std plt.figure() meanplot_arm, = plt.plot(means_arm, label='mean arm') stdplot_arm, = plt.plot(stds_arm, label='std arm') meanplot_eye, = plt.plot(means_eye, label='mean eye') stdplot_eye, = plt.plot(stds_eye, label='std eye') plt.legend(handles=[meanplot_arm, stdplot_arm, meanplot_eye, stdplot_eye]) plt.savefig('../images/scenario1/accuracy_combined.png') plt.show() # Plot just the means plt.figure() meanplot_arm, = plt.plot(means_arm, label='mean arm') meanplot_eye, = plt.plot(means_eye, label='mean eye') plt.legend(handles=[meanplot_arm, meanplot_eye]) plt.savefig('../images/scenario1/accuracy_combined_arm.png') # Ploot the train and validations losses plt.figure() trainplot_arm, = plt.plot(train_losses_arm, label='train loss arm') valplot_arm, = plt.plot(val_losses_arm, label='val loss arm') trainplot_eye, = plt.plot(train_losses_eye, label='train loss eye') valplot_eye, = plt.plot(val_losses_eye, label='val loss eye') plt.legend(handles=[trainplot_arm, valplot_arm, trainplot_eye, valplot_eye]) plt.savefig('../images/scenario1/loss_combined.png') plt.show() # Plot distance errors plt.figure() distsplot_arm, = plt.plot(dists_arm, label='Distance Error arm') plt.legend(handles=[distsplot_arm]) plt.savefig('../images/scenario1/distance_error_arm.png') plt.show() np.save('../images/scenario1/distance_arm', dists_arm) # Plot Distance error ot the eye plt.figure() distsplot_eye, = plt.plot(dists_eye, label='Distance Error eye') plt.legend(handles=[distsplot_eye]) plt.savefig('../images/scenario1/distance_error_eye.png') plt.show() np.save('../images/scenario1/distance_eye', dists_eye) # Save the weights np.save('network_arm_s1', layers.get_all_param_values(network1)) np.save('network_eye_s1', layers.get_all_param_values(network2)) return # network, predictions
denominator = freqs[n-1][history] + absSigma * Lambda else: denominator = absSigma * Lambda return float(numerator)/denominator ## --- EVALUATION --- ## # Evaluating unsmoothed maximum likelohood estimats will show you instances # in which the model fails due to datasparsness. We don't want to see those # anymore since we have already fixed this adding smoothing. # evaluate(MLE, POSfreqs[0:4], NEGfreqs[0:4], POStest, NEGtest, "MLE with 3-grams", validate=True) evaluate(AddOne, POSfreqs[0:2], NEGfreqs[0:2], POStest, NEGtest, "1-grams with add-1 smoothing", validate=True) evaluate(AddOne, POSfreqs[0:3], NEGfreqs[0:3], POStest, NEGtest, "2-grams with add-1 smoothing", validate=True) evaluate(AddOne, POSfreqs[0:4], NEGfreqs[0:4], POStest, NEGtest, "3-grams with add-1 smoothing", validate=True) evaluate(AddOne, POSfreqs[0:5], NEGfreqs[0:5], POStest, NEGtest, "4-grams with add-1 smoothing", validate=True) evaluate(AddOne, POSfreqs[0:6], NEGfreqs[0:6], POStest, NEGtest, "5-grams with add-1 smoothing", validate=True) Lambda = 6 evaluate(AddLambda, POSfreqs[0:6], NEGfreqs[0:6], POStest, NEGtest, "5-grams with add-lambda smoothing", validate=True) Lambda = 2 evaluate(AddLambda, POSfreqs[0:6], NEGfreqs[0:6], POStest, NEGtest, "5-grams with add-lambda smoothing", validate=True)
import util from rbfn import RBFN if __name__ == '__main__': # dataset_file = raw_input('Training dataset location: ') dataset_file = 'dataset.csv' training_dataset, test_dataset = util.split_dataset(dataset_file) training_inputs, training_outputs = util.separate_dataset_io( training_dataset, is_training=True) rbfn = RBFN(n_centroids=8) rbfn.train(training_inputs, training_outputs) test_inputs, test_outputs = util.separate_dataset_io( test_dataset, is_training=True) results = rbfn.predict(test_inputs) print util.evaluate(test_outputs, results)
def main(): result, answer = parse_cmd_args() util.evaluate(result, answer) return None
def main(): wsd = NaiveBayesWSD() wsd.run() util.evaluate("../result/NaiveBayesWSD_result.txt", "../result/test_answer")
def main(): hl = HingeLossClassifier() hl.learn_boundary(pickled_training_file) util.evaluate(pickled_testing_file, hl.predict) print hl.weights
# we want to weight the distances appropriately # See time to predict for 1 element in the testing set. # Then, take a call on speeding it up #options include: #1. k-d tree's (but its difficult cause of the high possible dimensionality, but low real dimensionality) #2.cache the test set and if new test ele within some small distance of it then use the previous prediction #3. Run the test queries in parallel. #def evaluate(pickled_test_file,predictor): # test_examples = pickle.load(open(pickled_test_file, 'rb')) # errors = 0 # false_positive = 0 # false_negative = 0 # N = len(test_examples) # for feature,label in test_examples: # print "about to begin running the predictor" # result = predictor(feature) # print "finished" # if result != label: # errors += 1 # if result == 1 and label == -1 : # false_positive += 1 # if result == -1 and label == 1: # false_negative += 1 # print "label = %d and result = %d" %(label,result) # break if __name__ == "__main__": nn = NearestNeighbors(5,"trainingSet.p") util.evaluate("testingSet.p", nn.predict)