Пример #1
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))',
                '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))',
                '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))',
                '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))
Пример #2
0
    def test_no_objects(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")

        result = self.aggregate(categories=[TermCategory([term1, term2])],
                                objects=False)
        self.assertEqual(result, {("aap", 2), ("noot", 2)})
Пример #3
0
    def test_term_category(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")
        term3 = SearchQuery("lamp")

        result = self.aggregate(
            categories=[TermCategory([term1, term2, term3])])
        self.assertEqual(result, {(term1, 2), (term2, 2), (term3, 1)})
Пример #4
0
    def test_terms_aggregate(self):
        m1, m2, m3, s1, s2, a, b, c, d, e = self.setup()
        q1 = SearchQuery.from_string("noot")
        q2 = SearchQuery.from_string("bla")

        query = lambda **kw: ES().aggregate_query(filters={"sets": s1.id}, **kw)

        # Should raise error if not terms are supplied
        self.assertRaises(ValueError, query, group_by=["terms"])

        # Should convert terms to 'buckets'
        aggr = query(group_by=["terms"], terms=[q1, q2])
        self.assertEqual(set(aggr), {(q1, 3), (q2, 1)})
Пример #5
0
    def test_terms_aggregate(self):
        m1, m2, m3, s1, s2, a, b, c, d, e = self.setup()
        q1 = SearchQuery.from_string("noot")
        q2 = SearchQuery.from_string("bla")

        query = lambda **kw: ES().aggregate_query(filters={"sets": s1.id}, **kw)

        # Should raise error if not terms are supplied
        self.assertRaises(ValueError, query, group_by=["terms"])

        # Should convert terms to 'buckets'
        aggr = query(group_by=["terms"], terms=[q1, q2])
        self.assertEqual(set(aggr), {(q1, 3), (q2, 1)})
Пример #6
0
    def set_up(self):
        self.aset = amcattest.create_test_set()
        self.a1 = amcattest.create_test_article(text="de de het", articleset=self.aset)
        self.a2 = amcattest.create_test_article(text="de", articleset=self.aset)
        self.a3 = amcattest.create_test_article(text="een", articleset=self.aset)

        self.de = SearchQuery.from_string("de")
        self.het = SearchQuery.from_string("het")
        self.aap = SearchQuery.from_string("aap")

        self.filters = {"sets": [self.aset.id]}
        amcates.ES().flush()

        self.ass = Association([self.de, self.het], self.filters)
Пример #7
0
    def test_get_clustermap_table(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }
        headers, rows = get_clustermap_table(queries)

        self.assertEqual(['a', 'b', 'c', 'Total'], headers)
        self.assertEqual(
            sorted(rows),
            [
                (0, 1, 0, 1),  # article 4
                (1, 0, 0, 2),  # articles 2 and 3
                (1, 1, 1, 1),  # article 1
            ])
Пример #8
0
    def set_up(self):
        self.aset = amcattest.create_test_set()
        self.a1 = amcattest.create_test_article(text="de de het",
                                                articleset=self.aset)
        self.a2 = amcattest.create_test_article(text="de",
                                                articleset=self.aset)
        self.a3 = amcattest.create_test_article(text="een",
                                                articleset=self.aset)

        self.de = SearchQuery.from_string("de")
        self.het = SearchQuery.from_string("het")
        self.aap = SearchQuery.from_string("aap")

        self.filters = {"sets": [self.aset.id]}
        amcates.ES().flush()

        self.ass = Association([self.de, self.het], self.filters)
Пример #9
0
    def test_resolve_queries_recursive(self):
        query = SearchQuery("<root+>")

        expected_results = set(word for code in self.codes
                               for word in code[1].split(" OR "))

        result_query = list(
            resolve_queries([query], self.codebook, self.l_lang,
                            self.r_lang))[0].query
        query_words = set(result_query[1:-1].split(" OR "))
        self.assertSetEqual(query_words, expected_results)
Пример #10
0
    def test_resolve_queries(self):
        queries = {
            SearchQuery(u"<{}>".format(label)): result
            for label, result, _ in self.codes
        }

        for query, expected_result in queries.items():
            result_query = list(
                resolve_queries([query], self.codebook, self.l_lang,
                                self.r_lang))[0].query
            self.assertEqual(result_query, u"({})".format(expected_result))
Пример #11
0
    def test_multiple_categories(self):
        self.set_up()

        term1 = SearchQuery("aap")
        term2 = SearchQuery("noot")
        term3 = SearchQuery("lamp")

        cat1 = TermCategory([term1, term2, term3])
        cat2 = IntervalCategory("day", fill_zeros=False)

        result = self.aggregate(categories=[cat1, cat2])
        self.assertEqual(
            result, {(term1, datetime.date(2010, 1, 1).isoformat(), 2),
                     (term2, datetime.date(2010, 1, 1).isoformat(), 2),
                     (term3, datetime.date(2010, 1, 2).isoformat(), 1)})

        result = self.aggregate(categories=[cat2, cat1])
        self.assertEqual(
            result, {(datetime.date(2010, 1, 1).isoformat(), term1, 2),
                     (datetime.date(2010, 1, 1).isoformat(), term2, 2),
                     (datetime.date(2010, 1, 1).isoformat(), term3, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term1, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term2, 0),
                     (datetime.date(2010, 1, 2).isoformat(), term3, 1)})
Пример #12
0
 def test_get_label_delimiter(self):
     self.assertEquals(SearchQuery._get_label_delimiter("abc", "a"), "a")
     self.assertEquals(SearchQuery._get_label_delimiter("abc", "ab"), "a")
     self.assertEquals(SearchQuery._get_label_delimiter("abc", "ba"), "b")
     self.assertEquals(SearchQuery._get_label_delimiter("abc", "d"), None)
Пример #13
0
 def test_from_string(self):
     label = u'\u5728\u8377\u5170\u98ce\u8f66'
     query_text = u'\xda\xd1\xcd\xa2\xd3\xd0\xc9 \xde\xc9X\xde'
     query = SearchQuery.from_string(u'{}#{}'.format(label, query_text))
     self.assertEqual(query.label, strip_accents(label))
     self.assertEqual(query.query, strip_accents(query_text))
Пример #14
0
 def test_from_string(self):
     label = u'\u5728\u8377\u5170\u98ce\u8f66'
     query_text = u'\xda\xd1\xcd\xa2\xd3\xd0\xc9 \xde\xc9X\xde'
     query = SearchQuery.from_string(u'{}#{}'.format(label, query_text))
     self.assertEqual(query.label, strip_accents(label))
     self.assertEqual(query.query, strip_accents(query_text))