def test_min_rank(self): index = tfidf_baseline_utils.create_inverted_index([ interaction_pb2.Table(table_id="table_0", document_title="Table A"), interaction_pb2.Table(table_id="table_1", document_title="Table B") ], min_rank=1) self.assertEqual(index.retrieve("A"), [("table_0", 1.0)]) self.assertEqual(index.retrieve("B"), [("table_1", 1.0)])
def test_simple(self, drop_term_frequency, expected): index = tfidf_baseline_utils.create_inverted_index( [ interaction_pb2.Table(table_id="table_0", document_title="a a c"), interaction_pb2.Table(table_id="table_1", document_title="b c") ], drop_term_frequency=drop_term_frequency) for query, results in expected: self.assertEqual(index.retrieve(query), results)
def create_index(tables, title_multiplicator, use_bm25): if use_bm25: return tfidf_baseline_utils.create_bm25_index( tables, title_multiplicator=title_multiplicator, ) return tfidf_baseline_utils.create_inverted_index( tables=tables, min_rank=FLAGS.min_term_rank, drop_term_frequency=FLAGS.drop_term_frequency, title_multiplicator=title_multiplicator, )