def test_get_cluster_queries(self): queries = { SearchQuery("a"): [1, 2, 3], SearchQuery("b"): [1, 4], SearchQuery("c"): [1] } clusters = get_clusters(queries).keys() queries = set(get_cluster_queries(clusters)) good_queries = [ # get_cluster_queries generates queries non-deterministically ( '((a) AND (b) AND (c))', '((a) AND (c) AND (b))', '((b) AND (a) AND (c))', '((b) AND (c) AND (a))', '((c) AND (a) AND (b))', '((c) AND (b) AND (a))', ), ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'), ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))') ] for qs in good_queries: self.assertTrue(any(q in queries for q in qs))
def test_no_objects(self): self.set_up() term1 = SearchQuery("aap") term2 = SearchQuery("noot") result = self.aggregate(categories=[TermCategory([term1, term2])], objects=False) self.assertEqual(result, {("aap", 2), ("noot", 2)})
def test_term_category(self): self.set_up() term1 = SearchQuery("aap") term2 = SearchQuery("noot") term3 = SearchQuery("lamp") result = self.aggregate( categories=[TermCategory([term1, term2, term3])]) self.assertEqual(result, {(term1, 2), (term2, 2), (term3, 1)})
def test_get_clustermap_table(self): queries = { SearchQuery("a"): [1, 2, 3], SearchQuery("b"): [1, 4], SearchQuery("c"): [1] } headers, rows = get_clustermap_table(queries) self.assertEqual(['a', 'b', 'c', 'Total'], headers) self.assertEqual( sorted(rows), [ (0, 1, 0, 1), # article 4 (1, 0, 0, 2), # articles 2 and 3 (1, 1, 1, 1), # article 1 ])
def test_resolve_queries_recursive(self): query = SearchQuery("<root+>") expected_results = set(word for code in self.codes for word in code[1].split(" OR ")) result_query = list( resolve_queries([query], self.codebook, self.l_lang, self.r_lang))[0].query query_words = set(result_query[1:-1].split(" OR ")) self.assertSetEqual(query_words, expected_results)
def test_resolve_queries(self): queries = { SearchQuery(u"<{}>".format(label)): result for label, result, _ in self.codes } for query, expected_result in queries.items(): result_query = list( resolve_queries([query], self.codebook, self.l_lang, self.r_lang))[0].query self.assertEqual(result_query, u"({})".format(expected_result))
def test_multiple_categories(self): self.set_up() term1 = SearchQuery("aap") term2 = SearchQuery("noot") term3 = SearchQuery("lamp") cat1 = TermCategory([term1, term2, term3]) cat2 = IntervalCategory("day", fill_zeros=False) result = self.aggregate(categories=[cat1, cat2]) self.assertEqual( result, {(term1, datetime.date(2010, 1, 1).isoformat(), 2), (term2, datetime.date(2010, 1, 1).isoformat(), 2), (term3, datetime.date(2010, 1, 2).isoformat(), 1)}) result = self.aggregate(categories=[cat2, cat1]) self.assertEqual( result, {(datetime.date(2010, 1, 1).isoformat(), term1, 2), (datetime.date(2010, 1, 1).isoformat(), term2, 2), (datetime.date(2010, 1, 1).isoformat(), term3, 0), (datetime.date(2010, 1, 2).isoformat(), term1, 0), (datetime.date(2010, 1, 2).isoformat(), term2, 0), (datetime.date(2010, 1, 2).isoformat(), term3, 1)})