Exemplo n.º 1
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))',
                '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))',
                '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))',
                '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))
Exemplo n.º 2
0
    def test_no_objects(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")

        result = self.aggregate(categories=[TermCategory([term1, term2])],
                                objects=False)
        self.assertEqual(result, {("aap", 2), ("noot", 2)})
Exemplo n.º 3
0
    def test_term_category(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")
        term3 = SearchQuery("lamp")

        result = self.aggregate(
            categories=[TermCategory([term1, term2, term3])])
        self.assertEqual(result, {(term1, 2), (term2, 2), (term3, 1)})
Exemplo n.º 4
0
    def test_terms_aggregate(self):
        m1, m2, m3, s1, s2, a, b, c, d, e = self.setup()
        q1 = SearchQuery.from_string("noot")
        q2 = SearchQuery.from_string("bla")

        query = lambda **kw: ES().aggregate_query(filters={"sets": s1.id}, **kw)

        # Should raise error if not terms are supplied
        self.assertRaises(ValueError, query, group_by=["terms"])

        # Should convert terms to 'buckets'
        aggr = query(group_by=["terms"], terms=[q1, q2])
        self.assertEqual(set(aggr), {(q1, 3), (q2, 1)})
Exemplo n.º 5
0
    def test_terms_aggregate(self):
        m1, m2, m3, s1, s2, a, b, c, d, e = self.setup()
        q1 = SearchQuery.from_string("noot")
        q2 = SearchQuery.from_string("bla")

        query = lambda **kw: ES().aggregate_query(filters={"sets": s1.id}, **kw)

        # Should raise error if not terms are supplied
        self.assertRaises(ValueError, query, group_by=["terms"])

        # Should convert terms to 'buckets'
        aggr = query(group_by=["terms"], terms=[q1, q2])
        self.assertEqual(set(aggr), {(q1, 3), (q2, 1)})
Exemplo n.º 6
0
    def set_up(self):
        self.aset = amcattest.create_test_set()
        self.a1 = amcattest.create_test_article(text="de de het", articleset=self.aset)
        self.a2 = amcattest.create_test_article(text="de", articleset=self.aset)
        self.a3 = amcattest.create_test_article(text="een", articleset=self.aset)

        self.de = SearchQuery.from_string("de")
        self.het = SearchQuery.from_string("het")
        self.aap = SearchQuery.from_string("aap")

        self.filters = {"sets": [self.aset.id]}
        amcates.ES().flush()

        self.ass = Association([self.de, self.het], self.filters)
Exemplo n.º 7
0
    def test_get_clustermap_table(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }
        headers, rows = get_clustermap_table(queries)

        self.assertEqual(['a', 'b', 'c', 'Total'], headers)
        self.assertEqual(
            sorted(rows),
            [
                (0, 1, 0, 1),  # article 4
                (1, 0, 0, 2),  # articles 2 and 3
                (1, 1, 1, 1),  # article 1
            ])
Exemplo n.º 8
0
    def set_up(self):
        self.aset = amcattest.create_test_set()
        self.a1 = amcattest.create_test_article(text="de de het",
                                                articleset=self.aset)
        self.a2 = amcattest.create_test_article(text="de",
                                                articleset=self.aset)
        self.a3 = amcattest.create_test_article(text="een",
                                                articleset=self.aset)

        self.de = SearchQuery.from_string("de")
        self.het = SearchQuery.from_string("het")
        self.aap = SearchQuery.from_string("aap")

        self.filters = {"sets": [self.aset.id]}
        amcates.ES().flush()

        self.ass = Association([self.de, self.het], self.filters)
Exemplo n.º 9
0
    def test_resolve_queries_recursive(self):
        query = SearchQuery("<root+>")

        expected_results = set(word for code in self.codes
                               for word in code[1].split(" OR "))

        result_query = list(
            resolve_queries([query], self.codebook, self.l_lang,
                            self.r_lang))[0].query
        query_words = set(result_query[1:-1].split(" OR "))
        self.assertSetEqual(query_words, expected_results)
Exemplo n.º 10
0
    def test_resolve_queries(self):
        queries = {
            SearchQuery(u"<{}>".format(label)): result
            for label, result, _ in self.codes
        }

        for query, expected_result in queries.items():
            result_query = list(
                resolve_queries([query], self.codebook, self.l_lang,
                                self.r_lang))[0].query
            self.assertEqual(result_query, u"({})".format(expected_result))
Exemplo n.º 11
0
    def test_multiple_categories(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")
        term3 = SearchQuery("lamp")

        cat1 = TermCategory([term1, term2, term3])
        cat2 = IntervalCategory("day", fill_zeros=False)

        result = self.aggregate(categories=[cat1, cat2])
        self.assertEqual(
            result, {(term1, datetime.date(2010, 1, 1).isoformat(), 2),
                     (term2, datetime.date(2010, 1, 1).isoformat(), 2),
                     (term3, datetime.date(2010, 1, 2).isoformat(), 1)})

        result = self.aggregate(categories=[cat2, cat1])
        self.assertEqual(
            result, {(datetime.date(2010, 1, 1).isoformat(), term1, 2),
                     (datetime.date(2010, 1, 1).isoformat(), term2, 2),
                     (datetime.date(2010, 1, 1).isoformat(), term3, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term1, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term2, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term3, 1)})
Exemplo n.º 12
0
 def test_get_label_delimiter(self):
     self.assertEquals(SearchQuery._get_label_delimiter("abc", "a"), "a")
     self.assertEquals(SearchQuery._get_label_delimiter("abc", "ab"), "a")
     self.assertEquals(SearchQuery._get_label_delimiter("abc", "ba"), "b")
     self.assertEquals(SearchQuery._get_label_delimiter("abc", "d"), None)
Exemplo n.º 13
0
 def test_from_string(self):
     label = u'\u5728\u8377\u5170\u98ce\u8f66'
     query_text = u'\xda\xd1\xcd\xa2\xd3\xd0\xc9 \xde\xc9X\xde'
     query = SearchQuery.from_string(u'{}#{}'.format(label, query_text))
     self.assertEqual(query.label, strip_accents(label))
     self.assertEqual(query.query, strip_accents(query_text))
Exemplo n.º 14
0
 def test_from_string(self):
     label = u'\u5728\u8377\u5170\u98ce\u8f66'
     query_text = u'\xda\xd1\xcd\xa2\xd3\xd0\xc9 \xde\xc9X\xde'
     query = SearchQuery.from_string(u'{}#{}'.format(label, query_text))
     self.assertEqual(query.label, strip_accents(label))
     self.assertEqual(query.query, strip_accents(query_text))