Exemplo n.º 1
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))',
                '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))',
                '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))',
                '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))
Exemplo n.º 2
0
    def test_no_objects(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")

        result = self.aggregate(categories=[TermCategory([term1, term2])],
                                objects=False)
        self.assertEqual(result, {("aap", 2), ("noot", 2)})
Exemplo n.º 3
0
    def test_term_category(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")
        term3 = SearchQuery("lamp")

        result = self.aggregate(
            categories=[TermCategory([term1, term2, term3])])
        self.assertEqual(result, {(term1, 2), (term2, 2), (term3, 1)})
Exemplo n.º 4
0
    def test_get_clustermap_table(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }
        headers, rows = get_clustermap_table(queries)

        self.assertEqual(['a', 'b', 'c', 'Total'], headers)
        self.assertEqual(
            sorted(rows),
            [
                (0, 1, 0, 1),  # article 4
                (1, 0, 0, 2),  # articles 2 and 3
                (1, 1, 1, 1),  # article 1
            ])
Exemplo n.º 5
0
    def test_resolve_queries_recursive(self):
        query = SearchQuery("<root+>")

        expected_results = set(word for code in self.codes
                               for word in code[1].split(" OR "))

        result_query = list(
            resolve_queries([query], self.codebook, self.l_lang,
                            self.r_lang))[0].query
        query_words = set(result_query[1:-1].split(" OR "))
        self.assertSetEqual(query_words, expected_results)
Exemplo n.º 6
0
    def test_resolve_queries(self):
        queries = {
            SearchQuery(u"<{}>".format(label)): result
            for label, result, _ in self.codes
        }

        for query, expected_result in queries.items():
            result_query = list(
                resolve_queries([query], self.codebook, self.l_lang,
                                self.r_lang))[0].query
            self.assertEqual(result_query, u"({})".format(expected_result))
Exemplo n.º 7
0
    def test_multiple_categories(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")
        term3 = SearchQuery("lamp")

        cat1 = TermCategory([term1, term2, term3])
        cat2 = IntervalCategory("day", fill_zeros=False)

        result = self.aggregate(categories=[cat1, cat2])
        self.assertEqual(
            result, {(term1, datetime.date(2010, 1, 1).isoformat(), 2),
                     (term2, datetime.date(2010, 1, 1).isoformat(), 2),
                     (term3, datetime.date(2010, 1, 2).isoformat(), 1)})

        result = self.aggregate(categories=[cat2, cat1])
        self.assertEqual(
            result, {(datetime.date(2010, 1, 1).isoformat(), term1, 2),
                     (datetime.date(2010, 1, 1).isoformat(), term2, 2),
                     (datetime.date(2010, 1, 1).isoformat(), term3, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term1, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term2, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term3, 1)})