Пример #1
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))',
                '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))',
                '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))',
                '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))
Пример #2
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            clusters, articles = zip(*get_clusters(queries).items())
            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps({
                "coords":
                coords,
                "image":
                b64encode(image).decode("ascii"),
                "clusters": [{
                    "query": q,
                    "articles": tuple(a)
                } for q, a in zip(cluster_queries, articles)]
            })

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(
                Table(rows=list(rows),
                      columns=list(map(str, headers)),
                      columnTypes=[int] * len(headers),
                      cellfunc=lambda row, col: row[_headers[col]]))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data[
                "output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query
                            for q in queries}
            })

        return result.getvalue()
Пример #3
0
    def run(self, form):
        selection = SelectionSearch.get_instance(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            try:
                clusters, articles = zip(*get_clusters(queries).items())
            except ValueError as e:
                raise ValueError("Cannot build clustermap of empty query result.")

            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps(
                {"coords": coords, "image": b64encode(image).decode("ascii"),
                 "clusters": [
                     {"query": q, "articles": tuple(a)}
                     for q, a in zip(cluster_queries, articles)
                 ]}
            )

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(Table(
                rows=list(rows),
                columns=list(map(str, headers)),
                columnTypes=[int]*len(headers),
                cellfunc=lambda row, col: row[_headers[col]]
            ))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data["output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query for q in queries}
            })

        return result.getvalue()
Пример #4
0
    def test_get_clusters(self):
        queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]}
        clusters = dict(get_clusters(queries))

        # Assert clusters
        self.assertIn(frozenset({'a', 'b', 'c'}), clusters)
        self.assertIn(frozenset({'a'}), clusters)
        self.assertIn(frozenset({'b'}), clusters)
        self.assertNotIn(frozenset({'c'}), clusters)
        self.assertNotIn(frozenset({'a', 'b'}), clusters)
        self.assertNotIn(frozenset({'c', 'a'}), clusters)
        self.assertNotIn(frozenset({'c', 'b'}), clusters)

        # Assert clustervalues
        self.assertEqual(clusters[frozenset({'a', 'b', 'c'})], {1})
        self.assertEqual(clusters[frozenset({'b'})], {4})
        self.assertEqual(clusters[frozenset({'a'})], {2, 3})
Пример #5
0
    def test_get_clusters(self):
        queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]}
        clusters = dict(get_clusters(queries))

        # Assert clusters
        self.assertIn(frozenset({'a', 'b', 'c'}), clusters)
        self.assertIn(frozenset({'a'}), clusters)
        self.assertIn(frozenset({'b'}), clusters)
        self.assertNotIn(frozenset({'c'}), clusters)
        self.assertNotIn(frozenset({'a', 'b'}), clusters)
        self.assertNotIn(frozenset({'c', 'a'}), clusters)
        self.assertNotIn(frozenset({'c', 'b'}), clusters)

        # Assert clustervalues
        self.assertEqual(clusters[frozenset({'a', 'b', 'c'})], {1})
        self.assertEqual(clusters[frozenset({'b'})], {4})
        self.assertEqual(clusters[frozenset({'a'})], {2, 3})
Пример #6
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))', '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))', '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))', '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))
Пример #7
0
 def test_get_table(self):
     queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]}
     clusters = dict(get_clusters(queries))
Пример #8
0
 def test_get_table(self):
     queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]}
     clusters = dict(get_clusters(queries))