def test_evaluate_results2(self): kw_mask = [0.8, 0.7, 0.3, 0.5, 0.1, 0.6, 0.9] kw_vector = [ (1, "supersymmetry"), (1, "experimental results"), (2, "numerical calculations"), (2, "quantum chromodynamics"), (3, "bibliography"), (4, "boundary condition"), (4, "critical phenomena"), ] gt_answers = { 1: { "supersymmetry", "string model", "experimental results", "cosmological model" }, 2: {"numerical calculations", "quantum chromodynamics"}, 3: {"duality", "membrane model"}, 4: {"critical phenomena"}, 5: {"lattice field theory", "CERN LHC Coll"}, } metrics = evaluate_results(kw_mask, kw_vector, gt_answers) self.assertAlmostEqual(metrics['p_at_3'], 1 / 3) self.assertAlmostEqual(metrics['p_at_5'], 0.2) self.assertAlmostEqual(metrics['mrr'], 367 / 600) self.assertAlmostEqual(metrics['map'], 0.52312410236323) self.assertAlmostEqual(metrics['r_prec'], 2252 / 5175)
def test_evaluate_results1(self): kw_mask = [0.5, 0.3, 0.8, 0.6] kw_vector = [ (1, "supersymmetry"), (2, "numerical calculations"), (2, "quantum chromodynamics"), (3, "bibliography"), ] gt_answers = { 1: {"supersymmetry", "string model"}, 2: {"numerical calculations"}, 3: {"duality", "membrane model"}, 4: {"lattice field theory"}, } metrics = evaluate_results( kw_mask, kw_vector, gt_answers ) self.assertAlmostEqual(metrics['p_at_3'], 1 / 6) self.assertAlmostEqual(metrics['p_at_5'], 0.1) self.assertAlmostEqual(metrics['mrr'], 557 / 1440) self.assertAlmostEqual(metrics['map'], 28877 / 106560) self.assertAlmostEqual(metrics['r_prec'], 689 / 4440)
def test_evaluate_results2(self): kw_mask = [0.8, 0.7, 0.3, 0.5, 0.1, 0.6, 0.9] kw_vector = [ (1, "supersymmetry"), (1, "experimental results"), (2, "numerical calculations"), (2, "quantum chromodynamics"), (3, "bibliography"), (4, "boundary condition"), (4, "critical phenomena"), ] gt_answers = { 1: {"supersymmetry", "string model", "experimental results", "cosmological model"}, 2: {"numerical calculations", "quantum chromodynamics"}, 3: {"duality", "membrane model"}, 4: {"critical phenomena"}, 5: {"lattice field theory", "CERN LHC Coll"}, } metrics = evaluate_results( kw_mask, kw_vector, gt_answers ) self.assertAlmostEqual(metrics['p_at_3'], 1 / 3) self.assertAlmostEqual(metrics['p_at_5'], 0.2) self.assertAlmostEqual(metrics['mrr'], 367 / 600) self.assertAlmostEqual(metrics['map'], 0.52312410236323) self.assertAlmostEqual(metrics['r_prec'], 2252 / 5175)
def test( testset_path=HEP_TEST_PATH, ontology=HEP_ONTOLOGY, model=MODEL_PATH, recreate_ontology=False, verbose=True, ): """ Test the trained model on a set under a given path. :param testset_path: path to the directory with the test set :param ontology: path to the ontology :param model: path where the model is pickled :param recreate_ontology: boolean flag whether to recreate the ontology :param verbose: whether to print computation times :return tuple of three floats (precision, recall, f1_score) """ if type(model) in [str, unicode]: model = load_from_disk(model) if type(ontology) in [str, unicode]: ontology = get_ontology(path=ontology, recreate=recreate_ontology) tick = time.clock() x, answers, kw_vector = build_test_matrices( get_documents(testset_path), model, testset_path, ontology, ) if verbose: print("Matrices built in: {0:.2f}s".format(time.clock() - tick)) # Predict y_pred = model.scale_and_predict_confidence(x) # Evaluate the results return evaluate_results( y_pred, kw_vector, answers, )
def test_evaluate_results1(self): kw_mask = [0.5, 0.3, 0.8, 0.6] kw_vector = [ (1, "supersymmetry"), (2, "numerical calculations"), (2, "quantum chromodynamics"), (3, "bibliography"), ] gt_answers = { 1: {"supersymmetry", "string model"}, 2: {"numerical calculations"}, 3: {"duality", "membrane model"}, 4: {"lattice field theory"}, } metrics = evaluate_results(kw_mask, kw_vector, gt_answers) self.assertAlmostEqual(metrics['p_at_3'], 1 / 6) self.assertAlmostEqual(metrics['p_at_5'], 0.1) self.assertAlmostEqual(metrics['mrr'], 557 / 1440) self.assertAlmostEqual(metrics['map'], 28877 / 106560) self.assertAlmostEqual(metrics['r_prec'], 689 / 4440)
def batch_test( testset_path=HEP_TEST_PATH, batch_size=BATCH_SIZE, ontology=HEP_ONTOLOGY, model=MODEL_PATH, recreate_ontology=False, verbose=True, ): """ Test the trained model on a set under a given path. :param testset_path: path to the directory with the test set :param batch_size: size of the testing batch :param ontology: path to the ontology :param model: path where the model is pickled :param recreate_ontology: boolean flag whether to recreate the ontology :param verbose: whether to print computation times :return tuple of three floats (precision, recall, f1_score) """ if type(model) in [str, unicode]: model = load_from_disk(model) if type(ontology) in [str, unicode]: ontology = get_ontology(path=ontology, recreate=recreate_ontology) doc_generator = get_documents(testset_path, as_generator=True) start_time = time.clock() all_metrics = ['map', 'mrr', 'ndcg', 'r_prec', 'p_at_3', 'p_at_5'] metrics_agg = {m: [] for m in all_metrics} if verbose: print("Batches:", end=' ') no_more_samples = False batch_number = 0 while not no_more_samples: batch_number += 1 batch = [] for i in xrange(batch_size): try: batch.append(doc_generator.next()) except StopIteration: no_more_samples = True break if not batch: break X, answers, kw_vector = build_test_matrices( batch, model, testset_path, ontology, ) # Predict y_pred = model.scale_and_predict_confidence(X) # Evaluate the results metrics = evaluate_results( y_pred, kw_vector, answers, ) for k, v in metrics.iteritems(): metrics_agg[k].append(v) if verbose: sys.stdout.write(b'.') sys.stdout.flush() if verbose: print() print("Testing finished in: {0:.2f}s".format(time.clock() - start_time)) return {k: np.mean(v) for k, v in metrics_agg.iteritems()}