def test_query_addition(self): constraint_list = None performer_query_ids = [1, 2, 3] full_query_ids = [23, 24, 25] query_cats = ["EQ", "EQ", "P2"] is_corrects = ["TRUE", "", ""] selection_cols = ["*", "*", "*"] matching_record_ids = [["1", "2", "3"], ["4"], ["5"]] matching_record_hashes = [["hash1", "hash2", "hash3"], ["hash4"], ["hash5"]] a_returned_record_ids = [["3", "2", "1"], ["4"], ["5"]] a_returned_record_hashes = [["badhash3", "hash2", "hash1"], ["hash4"], ["hash5"]] b_returned_record_ids = [["3", "2", "1"], ["4"], ["5"]] b_returned_record_hashes = [["hash3", "hash2", "hash1"], ["hash4"], ["hash5"]] p9_matching_record_counts = [None, None, None] policies = [[], [], []] statuses = [None, None, None] a_results_db = StubResultsDatabase([ performer_query_ids, full_query_ids, query_cats, is_corrects, selection_cols, matching_record_ids, matching_record_hashes, a_returned_record_ids, a_returned_record_hashes, p9_matching_record_counts, policies, statuses ]) b_results_db = StubResultsDatabase([ performer_query_ids, full_query_ids, query_cats, is_corrects, selection_cols, matching_record_ids, matching_record_hashes, b_returned_record_ids, b_returned_record_hashes, p9_matching_record_counts, policies, statuses ]) a_cg = correctness.QueryCorrectnessGetter(a_results_db, constraint_list) b_cg = correctness.QueryCorrectnessGetter(b_results_db, constraint_list) sum_cg = a_cg + b_cg expected_count = 6 expected_precision = 1.0 expected_recall = 1.0 expected_badhash_fraction = 0.1 expected_num_bad_rankings = 0 expected_num_correct = 5 expected_num_failed = 0 self.assertEqual(expected_count, sum_cg.get_count()) self.assertEqual(expected_precision, sum_cg.get_precision()) self.assertEqual(expected_recall, sum_cg.get_recall()) self.assertEqual(expected_badhash_fraction, sum_cg.get_badhash_fraction()) self.assertEqual(expected_num_bad_rankings, sum_cg.get_num_bad_rankings()) self.assertEqual(expected_num_failed, sum_cg.get_num_failed()) self.assertFalse(sum_cg.is_perfect())
def get_correctness_getter(self, cat=None, subcat=None, subsubcat=None, dbnr=None, dbrs=None, fieldtype=None): """Returns the desired correctness getter""" correctness_getter = correctness.QueryCorrectnessGetter() if cat != None: assert cat in t1s.CATEGORIES.numbers_list(), ( "invalid cat number %s" % str(cat)) if subcat != None: assert cat, "invalid subcat %s without cat" % str(subcat) assert subcat in t1s.SUBCATEGORIES[cat].numbers_list(), ( "invalid subcat number %s" % str(subcat)) if subsubcat != None: assert cat, "invalid subsubcat %s without cat" % str(subsubcat) assert subcat, ("invalid subsubcat %s without subsubcat" % str(subsubcat)) assert subsubcat in t1s.SUBSUBCATEGORIES[ (cat, subcat)].numbers_list(), ( "invalid subsubcat number %s" % str(subsubcat)) if fieldtype != None: assert cat in t1s.ATOMIC_CATEGORIES, ( "cannot obtain correctness getters by field type for composite " "query category %s" % t1s.CATEGORIES.to_string(cat)) correctness_getters = self._atomic_correctness_getters comparison_bases = [cat, subcat, subsubcat, dbnr, dbrs, fieldtype] else: correctness_getters = self._correctness_getters comparison_bases = [cat, subcat, subsubcat, dbnr, dbrs] for comparison_objects in correctness_getters.keys(): if all([(base in [None, obj]) for (base, obj) in zip(comparison_bases, comparison_objects)]): correctness_getter += correctness_getters[comparison_objects] return correctness_getter
def _check_baseline_correctness(self): """Checks and populates the baseline correctness.""" baseline_constraint_list = [ (t1s.DBP_TABLENAME, t1s.DBP_PERFORMERNAME, self.config.baselinename)] baseline_correctness_getter = correctness.QueryCorrectnessGetter( self.config.results_db, baseline_constraint_list, update_db=True) if not baseline_correctness_getter.is_perfect(): LOGGER.error("Baseline is not perfectly correct")
def _discover_correctness(self): """Populates the correctness_getters attribute.""" for (cat, subcat, subsubcat, dbnr, dbrs) in self.present_cats: cat_string = t1s.CATEGORIES.to_string(cat) if subcat not in results_schema.NULL_VALUES: subcat_string = t1s.SUBCATEGORIES[cat].to_string(subcat) else: subcat_string = "" if subsubcat not in results_schema.NULL_VALUES: subsubcat_string = str(subsubcat) else: subsubcat_string = "" category = (cat, subcat, subsubcat, dbnr, dbrs) this_constraint_list = [ (t1s.DBP_TABLENAME, t1s.DBP_PERFORMERNAME, self.config.performername), (t1s.DBF_TABLENAME, t1s.DBF_CAT, cat_string), (t1s.DBF_TABLENAME, t1s.DBF_SUBCAT, subcat_string), (t1s.DBF_TABLENAME, t1s.DBF_SUBSUBCAT, subsubcat_string), (t1s.DBF_TABLENAME, t1s.DBF_NUMRECORDS, dbnr), (t1s.DBF_TABLENAME, t1s.DBF_RECORDSIZE, dbrs)] if cat in t1s.ATOMIC_CATEGORIES: for fieldtype in t1s.TEST_FIELD_TYPES.numbers_list(): fieldtype_str = t1s.TEST_FIELD_TYPES.to_string(fieldtype) atomic_constraint_list = this_constraint_list + [ (t1s.DBA_TABLENAME, t1s.DBA_FIELDTYPE, fieldtype_str)] atomic_correctness_getter = correctness.QueryCorrectnessGetter( self.config.results_db, constraint_list=atomic_constraint_list, update_db=True) self._atomic_correctness_getters[ category + tuple([fieldtype]) ] = atomic_correctness_getter correctness_getter = sum( [self._atomic_correctness_getters[ category + tuple([fieldtype])] for fieldtype in t1s.TEST_FIELD_TYPES.numbers_list()], correctness.QueryCorrectnessGetter()) else: correctness_getter = correctness.QueryCorrectnessGetter( self.config.results_db, constraint_list=this_constraint_list, update_db=True) self._correctness_getters[category] = correctness_getter
def test_integration_with_db(self): results_db = t1d.Ta1ResultsDB(":memory:") set_up_static_db(results_db) self.assertEqual( results_db.get_query_values([(t1s.DBP_TABLENAME, t1s.DBP_ISCORRECT) ])[0], ["", ""]) cg = correctness.QueryCorrectnessGetter(results_db, update_db=True) self.assertEqual( results_db.get_query_values([(t1s.DBP_TABLENAME, t1s.DBP_ISCORRECT) ])[0], [True, False]) results_db.close()
def test_query_select_id(self): constraint_list = None performer_query_ids = [1, 2, 3] full_query_ids = [23, 24, 25] query_cats = ["EQ", "EQ", "EQ"] is_corrects = ["TRUE", "", ""] selection_cols = ["id", "id", "id"] matching_record_ids = [["1", "2", "3"], ["4"], ["5"]] matching_record_hashes = [["hash1", "hash2", "hash3"], ["hash4"], ["hash5"]] returned_record_ids = [["1", "2", "3"], ["4"], ["5"]] returned_record_hashes = [["hash1", "hash2", "badhash3"], ["hash4"], ["hash5"]] p9_matching_record_counts = [None, None, None] policies = [[], [], []] statuses = [None, None, None] results_db = StubResultsDatabase([ performer_query_ids, full_query_ids, query_cats, is_corrects, selection_cols, matching_record_ids, matching_record_hashes, returned_record_ids, returned_record_hashes, p9_matching_record_counts, policies, statuses ]) cg = correctness.QueryCorrectnessGetter(results_db, constraint_list) expected_count = 3 expected_precision = 1.0 expected_recall = 1.0 expected_badhash_fraction = 0 expected_num_bad_rankings = 0 expected_num_correct = 3 expected_num_failed = 0 self.assertEqual(expected_count, cg.get_count()) self.assertEqual(expected_precision, cg.get_precision()) self.assertEqual(expected_recall, cg.get_recall()) self.assertEqual(expected_badhash_fraction, cg.get_badhash_fraction()) self.assertEqual(expected_num_bad_rankings, cg.get_num_bad_rankings()) self.assertEqual(expected_num_failed, cg.get_num_failed()) self.assertTrue(cg.is_perfect()) self.assertEqual(expected_num_correct, cg.get_num_correct())
def test_no_queries_found(self): constraint_list = None performer_query_ids = [] full_query_ids = [] query_cats = [] is_corrects = [] selection_cols = [] matching_record_ids = [] matching_record_hashes = [] returned_record_ids = matching_record_ids returned_record_hashes = matching_record_hashes p9_matching_record_counts = [] policies = [] statuses = [] results_db = StubResultsDatabase([ performer_query_ids, full_query_ids, query_cats, is_corrects, selection_cols, matching_record_ids, matching_record_hashes, returned_record_ids, returned_record_hashes, p9_matching_record_counts, policies, statuses ]) cg = correctness.QueryCorrectnessGetter(results_db, constraint_list) expected_count = 0 expected_precision = 1.0 expected_recall = 1.0 expected_badhash_fraction = 0.0 expected_num_bad_rankings = 0 expected_num_correct = 0 expected_num_failed = 0 self.assertEqual(expected_count, cg.get_count()) self.assertEqual(expected_precision, cg.get_precision()) self.assertEqual(expected_recall, cg.get_recall()) self.assertEqual(expected_badhash_fraction, cg.get_badhash_fraction()) self.assertEqual(expected_num_bad_rankings, cg.get_num_bad_rankings()) self.assertEqual(expected_num_failed, cg.get_num_failed()) self.assertTrue(cg.is_perfect()) self.assertEqual(expected_num_correct, cg.get_num_correct())