Exemplo n.º 1
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))',
                '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))',
                '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))',
                '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))
Exemplo n.º 2
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            clusters, articles = zip(*get_clusters(queries).items())
            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps({
                "coords":
                coords,
                "image":
                b64encode(image).decode("ascii"),
                "clusters": [{
                    "query": q,
                    "articles": tuple(a)
                } for q, a in zip(cluster_queries, articles)]
            })

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(
                Table(rows=list(rows),
                      columns=list(map(str, headers)),
                      columnTypes=[int] * len(headers),
                      cellfunc=lambda row, col: row[_headers[col]]))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data[
                "output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query
                            for q in queries}
            })

        return result.getvalue()
Exemplo n.º 3
0
    def run(self, form):
        selection = SelectionSearch.get_instance(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            try:
                clusters, articles = zip(*get_clusters(queries).items())
            except ValueError as e:
                raise ValueError("Cannot build clustermap of empty query result.")

            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps(
                {"coords": coords, "image": b64encode(image).decode("ascii"),
                 "clusters": [
                     {"query": q, "articles": tuple(a)}
                     for q, a in zip(cluster_queries, articles)
                 ]}
            )

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(Table(
                rows=list(rows),
                columns=list(map(str, headers)),
                columnTypes=[int]*len(headers),
                cellfunc=lambda row, col: row[_headers[col]]
            ))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data["output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query for q in queries}
            })

        return result.getvalue()
Exemplo n.º 4
0
    def test_get_clusters(self):
        queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]}
        clusters = dict(get_clusters(queries))

        # Assert clusters
        self.assertIn(frozenset({'a', 'b', 'c'}), clusters)
        self.assertIn(frozenset({'a'}), clusters)
        self.assertIn(frozenset({'b'}), clusters)
        self.assertNotIn(frozenset({'c'}), clusters)
        self.assertNotIn(frozenset({'a', 'b'}), clusters)
        self.assertNotIn(frozenset({'c', 'a'}), clusters)
        self.assertNotIn(frozenset({'c', 'b'}), clusters)

        # Assert clustervalues
        self.assertEqual(clusters[frozenset({'a', 'b', 'c'})], {1})
        self.assertEqual(clusters[frozenset({'b'})], {4})
        self.assertEqual(clusters[frozenset({'a'})], {2, 3})
Exemplo n.º 5
0
    def test_get_clusters(self):
        queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]}
        clusters = dict(get_clusters(queries))

        # Assert clusters
        self.assertIn(frozenset({'a', 'b', 'c'}), clusters)
        self.assertIn(frozenset({'a'}), clusters)
        self.assertIn(frozenset({'b'}), clusters)
        self.assertNotIn(frozenset({'c'}), clusters)
        self.assertNotIn(frozenset({'a', 'b'}), clusters)
        self.assertNotIn(frozenset({'c', 'a'}), clusters)
        self.assertNotIn(frozenset({'c', 'b'}), clusters)

        # Assert clustervalues
        self.assertEqual(clusters[frozenset({'a', 'b', 'c'})], {1})
        self.assertEqual(clusters[frozenset({'b'})], {4})
        self.assertEqual(clusters[frozenset({'a'})], {2, 3})
Exemplo n.º 6
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))', '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))', '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))', '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))
Exemplo n.º 7
0
 def test_get_table(self):
     queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]}
     clusters = dict(get_clusters(queries))
Exemplo n.º 8
0
 def test_get_table(self):
     queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]}
     clusters = dict(get_clusters(queries))