Пример #1
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_queries()
        articlesets = form.cleaned_data["articlesets"]
        mediums = form.cleaned_data["mediums"]

        statistics = selection.get_statistics()

        if hasattr(statistics, "start_date"):
            start_date = statistics.start_date
            end_date = statistics.end_date
        else:
            start_date = None
            end_date = None

        return json.dumps(
            {
                "queries": {q.label: q.query
                            for q in queries},
                "mediums": {m.id: m.name
                            for m in mediums},
                "articlesets": {a.id: a.name
                                for a in articlesets},
                "statistics": {
                    "start_date": start_date,
                    "end_date": end_date,
                    "narticles": statistics.n
                }
            },
            cls=DjangoJSONEncoder)
Пример #2
0
    def run(self, form):
        selection = SelectionSearch(form)

        try:
            # Try to retrieve cache values
            primary, secondary, categories, aggregation = self.get_cache()
        except NotInCacheError:
            self.monitor.update(message="Executing query..")
            narticles = selection.get_count()
            self.monitor.update(message="Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            primary = form.cleaned_data["primary"]
            secondary = form.cleaned_data["secondary"]
            categories = list(filter(None, [primary, secondary]))
            aggregation = list(selection.get_aggregate(categories, flat=False))

            self.set_cache([primary, secondary, categories, aggregation])
        else:
            self.monitor.update(2)

        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, [CountArticlesValue()])

        self.monitor.update(message="Serialising..".format(**locals()))
        return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
Пример #3
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_queries()
        articlesets = form.cleaned_data["articlesets"]
        mediums = form.cleaned_data["mediums"]

        statistics = selection.get_statistics()

        if hasattr(statistics, "start_date"):
            start_date = statistics.start_date
            end_date = statistics.end_date
        else:
            start_date = None
            end_date = None

        return json.dumps({
            "queries": {q.label: q.query for q in queries},
            "mediums": {m.id: m.name for m in mediums},
            "articlesets": {a.id: a.name for a in articlesets},
            "statistics": {
                "start_date": start_date,
                "end_date": end_date,
                "narticles": statistics.n
            }
        }, cls=DjangoJSONEncoder)
Пример #4
0
    def run(self, form):
        assert isinstance(
            self.data, QueryDict
        ), "Class should have been instantiated with a django QueryDict as 'data'"

        selection = SelectionSearch(form)
        data = {API_KEYWORD_MAP.get(k, k): v for k, v in self.data.lists()}
        data["q"] = [
            "{}#{}".format(q.label, q.query) for q in selection.get_queries()
        ]
        data["ids"] = data.get("ids", selection.get_filters().get("ids", []))
        url = urlencode(data, doseq=True)
        rowlink = ARTICLE_ROWLINK.format(
            reverse("navigator:project-details", args=[self.project.id]),
            "{id}")
        table = Datatable(SearchResource,
                          url="/api/v4/search",
                          rowlink=rowlink,
                          rowlink_open_in="new",
                          checkboxes=True,
                          allow_export_via_post=True,
                          allow_html_export=True)
        table = table.add_arguments(minimal="1")
        table = table.add_arguments(project=str(self.project.id))

        for k, vs in data.items():
            for v in vs:
                table = table.add_arguments(**{k: v})

        return TABLE_TEMPLATE.render(
            Context({
                "form": form,
                "url": url,
                "table": table
            }))
Пример #5
0
    def run(self, form):
        selection = SelectionSearch(form)

        try:
            # Try to retrieve cache values
            primary, secondary, categories, aggregation = self.get_cache()
        except NotInCacheError:
            self.monitor.update(message="Executing query..")
            narticles = selection.get_count()
            self.monitor.update(message="Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            order_by = form.cleaned_data["order_by"]
            primary = form.cleaned_data["primary"]
            secondary = form.cleaned_data["secondary"]
            categories = list(filter(None, [primary, secondary]))
            aggregation = list(selection.get_aggregate(categories, flat=False))
            aggregation = sorted_aggregation(*order_by, aggregation)

            self.set_cache([primary, secondary, categories, aggregation])
        else:
            self.monitor.update(2)

        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, [CountArticlesValue()])

        self.monitor.update(message="Serialising..".format(**locals()))
        return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
Пример #6
0
    def get_association(self, form):
        selection = SelectionSearch(form)
        filters = selection.get_filters()
        queries = selection.get_queries()

        weighted = form.cleaned_data["weigh"]
        interval = form.cleaned_data["interval"]

        return Association(queries, filters, weighted=weighted, interval=interval)
Пример #7
0
    def get_association(self, form):
        selection = SelectionSearch(form)
        filters = selection.get_filters()
        queries = selection.get_queries()

        weighted = form.cleaned_data["weigh"]
        interval = form.cleaned_data["interval"]

        return Association(queries,
                           filters,
                           weighted=weighted,
                           interval=interval)
Пример #8
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            clusters, articles = zip(*get_clusters(queries).items())
            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps({
                "coords":
                coords,
                "image":
                b64encode(image).decode("ascii"),
                "clusters": [{
                    "query": q,
                    "articles": tuple(a)
                } for q, a in zip(cluster_queries, articles)]
            })

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(
                Table(rows=list(rows),
                      columns=list(map(str, headers)),
                      columnTypes=[int] * len(headers),
                      cellfunc=lambda row, col: row[_headers[col]]))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data[
                "output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query
                            for q in queries}
            })

        return result.getvalue()
Пример #9
0
    def run(self, form):
        selection = SelectionSearch(form)
        data = {API_KEYWORD_MAP.get(k, k): v for k,v in self.data.iterlists()}
        data["q"] = ["{}#{}".format(q.label, q.query) for q in selection.get_queries()]
        url = urllib.urlencode(data, doseq=True)

        table = Datatable(SearchResource, url="/api/v4/search")
        table = table.add_arguments(minimal="1")
        table = table.add_arguments(project=str(self.project.id))

        for k, vs in data.items():
            for v in vs:
                table = table.add_arguments(**{k:v})

        return TABLE_TEMPLATE.render(Context({"form": form, "url": url, "table": table}))
Пример #10
0
    def run(self, form):
        # Get codebook object
        new_codebook = form.cleaned_data["new_codebook"]
        if new_codebook:
            codebook = Codebook(name=new_codebook, project=self.project)
            codebook.save()
        else:
            codebook = form.cleaned_data["existing_codebook"]
            codebook.cache()

        # Get queries and their labels
        indicator_language = form.cleaned_data["indicator_language"]
        roots = {r.label: r for r in codebook.get_roots()}
        queries = {q.label: q for q in SelectionSearch.get_instance(form).get_queries()}

        updated, new = 0, 0
        for label, query in queries.items():
            if label in roots:
                # Update existing code
                roots[label].add_label(indicator_language, query.query, replace=True)
                updated += 1
            else:
                # Create new code
                code = Code(label=label)
                code.save()
                code.add_label(indicator_language, query.query, replace=True)
                codebook.add_code(code)
                new += 1

        return "Updated {} code(s), added {} new code(s).".format(updated, new)
Пример #11
0
    def _run_query(self, form_data, expected_indices=None, expected_count=None, msg=None):
        self._setUp()
        sets = ArticleSet.objects.filter(pk=self.articleset.pk)
        form = SelectionForm(articlesets=sets, project=self.articleset.project, data=form_data)
        form.full_clean()
        self.assertFalse(form.errors, "Form contains errors")

        search = SelectionSearch(form)
        if expected_indices:
            article_ids = search.get_article_ids()
            articles = Article.objects.filter(id__in=article_ids)
            expected = [self.articles[i] for i in expected_indices]
            self.assertSetEqual(set(articles), set(expected), msg=msg)

        if expected_count:
            self.assertEqual(search.get_count(), expected_count, msg=msg)
Пример #12
0
    def run(self, form):
        provenance = None#form.cleaned_data["provenance"] #TODO: is dit correct?
        job_size = form.cleaned_data["job_size"]

        self.monitor.update(10, "Executing query..")
        article_ids = list(SelectionSearch.get_instance(form).get_article_ids())

        cj = CodingJob()
        cj.project = self.project
        cj.name = form.cleaned_data["name"]
        cj.unitschema = form.cleaned_data["unitschema"]
        cj.articleschema = form.cleaned_data["articleschema"]
        cj.coder = form.cleaned_data["coder"]
        cj.insertuser = self.user

        self.monitor.update(50, "Creating codingjobs..")

        if job_size == 0:
            job_size = len(article_ids)

        n_batches = len(article_ids) // job_size
        n_batches += 1 if len(article_ids) % job_size else 0
        
        for i, cid in enumerate(_create_codingjob_batches(cj, article_ids, job_size)):
            progress = int((i / float(n_batches)) * (100 // 2))
            msg = "Creating codingjob {} of {}..".format(i+1, n_batches)
            print(50 + progress)
            self.monitor.update(50 + progress, msg)

            if provenance:
                cj = CodingJob.objects.get(id=cid)
                cj.provenance = provenance
                cj.save()

        return "Codingjob(s) created."
Пример #13
0
    def run(self, form):
        assert isinstance(self.data, QueryDict), "Class should have been instantiated with a django QueryDict as 'data'"

        selection = SelectionSearch.get_instance(form)
        data = {API_KEYWORD_MAP.get(k, k): v for k, v in self.data.lists()}
        data["q"] = ["{}#{}".format(q.label, q.query) for q in selection.get_queries()]
        data["ids"] = data.get("ids", selection.get_filters().get("ids", []))
        url = urlencode(data, doseq=True)
        rowlink = ARTICLE_ROWLINK.format(reverse("navigator:project-details", args=[self.project.id]), "{id}")
        table = Datatable(
            SearchResource,
            url="/api/v4/search",
            rowlink=rowlink,
            rowlink_open_in="new",
            checkboxes=True,
            allow_export_via_post=True,
            allow_html_export=True
        )
        table = table.add_arguments(minimal="1")
        table = table.add_arguments(project=str(self.project.id))

        for k, vs in data.items():
            for v in vs:
                table = table.add_arguments(**{k:v})

        return TABLE_TEMPLATE.render({"form": form, "url": url, "table": table})
Пример #14
0
    def run(self, form):
        selection = SelectionSearch.get_instance(form)
        queries = selection.get_queries()
        articlesets = form.cleaned_data["articlesets"]
        codingjobs = form.cleaned_data["codingjobs"]

        statistics = selection.get_statistics()

        if hasattr(statistics, "start_date"):
            start_date = statistics.start_date
            end_date = statistics.end_date
        else:
            start_date = None
            end_date = None

        return json.dumps({
            "queries": {q.label: q.query for q in queries},
            "articlesets": {a.id: a.name for a in articlesets},
            "codingjobs": {cj.id: cj.name for cj in codingjobs},
            "codes_used": list(get_used_code_ids(codingjobs)),
            "statistics": {
                "start_date": start_date,
                "end_date": end_date,
                "narticles": statistics.n
            }
        }, cls=DjangoJSONEncoder)
Пример #15
0
    def clean_relative_to(self):
        column = self.cleaned_data['relative_to']
        y_axis = self.cleaned_data['y_axis']

        if not column:
            return None

        if y_axis == "medium":
            if int(column) not in (m.id for m in self.cleaned_data["mediums"]):
                raise ValidationError(MEDIUM_ERR.format(column=column))
            return Medium.objects.get(id=int(column))

        if y_axis == "term":
            queries = SelectionSearch(self).get_queries()
            queries = {q.label: q for q in queries}
            if column not in queries:
                raise ValidationError(
                    "Term '{column}' not found in search terms.".format(
                        column=column))
            return queries[column]

        if y_axis == "set":
            if int(column) not in (aset.id for aset in self.articlesets):
                raise ValidationError(
                    "Set '{column}' not available.".format(column=column))
            return ArticleSet.objects.get(id=int(column))

        raise ValidationError("Not a valid column name.")
Пример #16
0
    def run(self, form):
        # Get codebook object
        new_codebook = form.cleaned_data["new_codebook"]
        if new_codebook:
            codebook = Codebook(name=new_codebook, project=self.project)
            codebook.save()
        else:
            codebook = form.cleaned_data["existing_codebook"]
            codebook.cache()

        # Get queries and their labels
        indicator_language = form.cleaned_data["indicator_language"]
        roots = {r.label: r for r in codebook.get_roots()}
        queries = {q.label: q for q in SelectionSearch(form).get_queries()}

        updated, new = 0, 0
        for label, query in queries.items():
            if label in roots:
                # Update existing code
                roots[label].add_label(indicator_language,
                                       query.query,
                                       replace=True)
                updated += 1
            else:
                # Create new code
                code = Code(label=label)
                code.save()
                code.add_label(indicator_language, query.query, replace=True)
                codebook.add_code(code)
                new += 1

        return "Updated {} code(s), added {} new code(s).".format(updated, new)
Пример #17
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            clusters, articles = zip(*get_clusters(queries).items())
            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps(
                {"coords": coords, "image": b64encode(image).decode("ascii"),
                 "clusters": [
                     {"query": q, "articles": tuple(a)}
                     for q, a in zip(cluster_queries, articles)
                 ]}
            )

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(Table(
                rows=list(rows),
                columns=list(map(str, headers)),
                columnTypes=[int]*len(headers),
                cellfunc=lambda row, col: row[_headers[col]]
            ))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data["output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query for q in queries}
            })

        return result.getvalue()
Пример #18
0
    def run(self, form):
        form_data = json.dumps(dict(form.data._iterlists()))

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        show_aggregation = form.cleaned_data['aggregations']

        with Timer() as timer:
            selection = SelectionSearch(form)
            self.monitor.update(1, "Executing query..")
            narticles = selection.get_count()
            self.monitor.update(39, "Fetching mediums..".format(**locals()))
            mediums = selection.get_mediums()
            self.monitor.update(59, "Fetching articles..".format(**locals()))
            articles = selection.get_articles(size=size, offset=offset)

            if show_aggregation:
                self.monitor.update(69, "Aggregating..".format(**locals()))
                date_aggr = selection.get_aggregate(x_axis="date",
                                                    y_axis="total",
                                                    interval="day")
                medium_aggr = selection.get_aggregate(x_axis="medium",
                                                      y_axis="date",
                                                      interval="day")

            self.monitor.update(79, "Rendering results..".format(**locals()))

        return TEMPLATE.render(
            Context(
                dict(locals(), **{
                    "project": self.project,
                    "user": self.user
                })))
Пример #19
0
    def run(self, form):
        self.monitor.update(10, "Executing query..")
        article_ids = list(SelectionSearch.get_instance(form).get_article_ids())
        _check_read_access(self.user, article_ids)
        self.monitor.update(60, "Saving to set..")
        form.cleaned_data["articleset"].add_articles(article_ids)

        return OK_TEMPLATE.render({
            "project": self.project,
            "aset": form.cleaned_data["articleset"],
            "len": len(article_ids)
        })
Пример #20
0
    def run(self, form):
        self.monitor.update(10, "Executing query..")
        article_ids = list(SelectionSearch(form).get_article_ids())
        self.monitor.update(60, "Saving to set..")
        #form.cleaned_data["articleset"].add_articles(article_ids)

        return OK_TEMPLATE.render(
            Context({
                "project": self.project,
                "aset": form.cleaned_data["articleset"],
                "len": len(article_ids)
            }))
Пример #21
0
    def run(self, form):
        form_data = json.dumps(dict(form.data._iterlists()))

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        show_aggregation = form.cleaned_data['aggregations']

        with Timer() as timer:
            selection = SelectionSearch(form)
            self.monitor.update(1, "Executing query..")
            narticles = selection.get_count()
            self.monitor.update(39, "Fetching mediums..".format(**locals()))
            mediums = selection.get_mediums()
            self.monitor.update(59, "Fetching articles..".format(**locals()))
            articles = selection.get_articles(size=size, offset=offset)

            if show_aggregation:
                self.monitor.update(69, "Aggregating..".format(**locals()))
                date_aggr = selection.get_aggregate(x_axis="date", y_axis="total", interval="day")
                medium_aggr = selection.get_aggregate(x_axis="medium", y_axis="date", interval="day")

            self.monitor.update(79, "Rendering results..".format(**locals()))


        return TEMPLATE.render(Context(dict(locals(), **{
            "project": self.project, "user": self.user
        })))
Пример #22
0
    def run(self, form):
        form_data = dict(form.data.lists())
        for value in form_data.values():
            if value == [None]:
                value.pop()
        form_data = json.dumps(form_data, indent=4)

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        number_of_fragments = form.cleaned_data['number_of_fragments']
        fragment_size = form.cleaned_data['fragment_size']
        show_fields = sorted(form.cleaned_data['show_fields'])
        show_aggregation = form.cleaned_data['aggregations']
        sort_by = form.cleaned_data.get('sort_by')
        sort_desc = "desc" if form.cleaned_data.get('sort_descending', False) else "asc"

        if sort_by:
            sort = [":".join([sort_by, sort_desc])]
        else:
            sort = []

        with Timer() as timer:
            selection = SelectionSearch.get_instance(form)
            self.monitor.update(message="Executing query..")
            narticles = selection.get_count()
            self.monitor.update(message="Fetching articles..".format(**locals()))
            articles = selection.get_articles(size=size, offset=offset, sort=sort).as_dicts()
            articles = get_fragments(selection.get_query(), [a["id"] for a in articles], fragment_size, number_of_fragments)

            if show_aggregation:
                self.monitor.update(message="Aggregating..".format(**locals()))
                
                statistics = selection.get_statistics()
                try:
                    delta_start_end = statistics.end_date - statistics.start_date
                    interval = next(interval for (interval, delta) in TIMEDELTAS
                                    if MAX_DATE_GROUPS * delta > delta_start_end)
                except TypeError:
                    interval = "day"
                except StopIteration:
                    interval = "year"

                date_aggr = selection.get_aggregate([IntervalCategory(interval)], objects=False)
            else:
                # Increase progress without doing anything (because we don't have to aggregate)
                self.monitor.update()

            self.monitor.update(message="Rendering results..".format(**locals()))

        return TEMPLATE.render(dict(locals(), **{
            "project": self.project, "user": self.user
        }))
Пример #23
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch(form)
        narticles = selection.get_count()
        self.monitor.update(10, "Found {narticles} articles. Aggregating..".format(**locals()))

        # Get aggregation
        aggregation = selection.get_aggregate(
            form.cleaned_data['x_axis'],
            form.cleaned_data['y_axis'],
            form.cleaned_data['interval']
        )

        #
        self.monitor.update(20, "Calculating relative values..".format(**locals()))
        column = form.cleaned_data['relative_to']

        if column is not None:
            aggregation = list(get_relative(aggregation, column))

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(list(aggregation), cls=AggregationEncoder, check_circular=False)
Пример #24
0
    def run(self, form):
        form_data = dict(form.data.lists())
        for value in form_data.values():
            if value == [None]:
                value.pop()
        form_data = json.dumps(form_data, indent=4)

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        number_of_fragments = form.cleaned_data['number_of_fragments']
        fragment_size = form.cleaned_data['fragment_size']
        show_fields = sorted(form.cleaned_data['show_fields'])
        show_aggregation = form.cleaned_data['aggregations']

        with Timer() as timer:
            selection = SelectionSearch(form)
            self.monitor.update(message="Executing query..")
            narticles = selection.get_count()
            self.monitor.update(message="Fetching articles..".format(
                **locals()))

            articles = selection.get_articles(size=size,
                                              offset=offset).as_dicts()
            articles = get_fragments(selection.get_query(),
                                     [a["id"] for a in articles],
                                     fragment_size, number_of_fragments)

            if show_aggregation:
                self.monitor.update(message="Aggregating..".format(**locals()))

                statistics = selection.get_statistics()
                try:
                    delta_start_end = statistics.end_date - statistics.start_date
                    interval = next(
                        interval for (interval, delta) in TIMEDELTAS
                        if MAX_DATE_GROUPS * delta > delta_start_end)
                except (StopIteration, TypeError):
                    interval = "day"

                date_aggr = selection.get_aggregate(
                    [IntervalCategory(interval)], objects=False)
            else:
                # Increase progress without doing anything (because we don't have to aggregate)
                self.monitor.update()

            self.monitor.update(message="Rendering results..".format(
                **locals()))

        return TEMPLATE.render(
            Context(
                dict(locals(), **{
                    "project": self.project,
                    "user": self.user
                })))
Пример #25
0
    def run(self, form):
        selection = SelectionSearch(form)
        data = {API_KEYWORD_MAP.get(k, k): v for k, v in self.data.iterlists()}
        data["q"] = [
            "{}#{}".format(q.label, q.query) for q in selection.get_queries()
        ]
        url = urllib.urlencode(data, doseq=True)

        table = Datatable(SearchResource, url="/api/v4/search")
        table = table.add_arguments(minimal="1")
        table = table.add_arguments(project=str(self.project.id))

        for k, vs in data.items():
            for v in vs:
                table = table.add_arguments(**{k: v})

        return TABLE_TEMPLATE.render(
            Context({
                "form": form,
                "url": url,
                "table": table
            }))
Пример #26
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch(form)
        narticles = selection.get_count()
        self.monitor.update(
            10, "Found {narticles} articles. Aggregating..".format(**locals()))

        # Get aggregation
        aggregation = selection.get_aggregate(form.cleaned_data['x_axis'],
                                              form.cleaned_data['y_axis'],
                                              form.cleaned_data['interval'])

        #
        self.monitor.update(20,
                            "Calculating relative values..".format(**locals()))
        column = form.cleaned_data['relative_to']

        if column is not None:
            aggregation = list(get_relative(aggregation, column))

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(list(aggregation),
                          cls=AggregationEncoder,
                          check_circular=False)
Пример #27
0
    def run(self, form):
        name = form.cleaned_data["name"]
        #provenance = form.cleaned_data["provenance"]
        project = form.cleaned_data["project"]
        aset = ArticleSet.objects.create(name=name, project=project)
        self.monitor.update(10, "Executing query..")
        article_ids = list(SelectionSearch(form).get_article_ids())
        self.monitor.update(60, "Saving to set..")
        aset.add_articles(article_ids)

        return OK_TEMPLATE.render(
            Context({
                "project": project,
                "aset": aset,
                "len": len(article_ids)
            }))
Пример #28
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch.get_instance(form)
        try:
            aggregation, primary, secondary, categories, values = self.get_cache()
        except NotInCacheError:
            narticles = selection.get_count()
            self.monitor.update(10, "Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            codingjobs = form.cleaned_data["codingjobs"]
            primary = form.cleaned_data['primary']
            secondary = form.cleaned_data['secondary']
            value1 = form.cleaned_data['value1']
            value2 = form.cleaned_data['value2']
            order_by = form.cleaned_data["order_by"]

            article_ids = list(selection.get_article_ids())

            codings = Coding.objects.filter(coded_article__article__id__in=article_ids,
                                            coded_article__codingjob__id__in=selection.data.codingjobs,
                                            coded_article__status=STATUS_COMPLETE)

            terms = selection.get_article_ids_per_query()
            orm_aggregate = ORMAggregate(codings, flat=False, terms=terms)
            categories = list(filter(None, [primary, secondary]))
            values = list(filter(None, [value1, value2]))
            aggregation = orm_aggregate.get_aggregate(categories, values)
            aggregation = sorted_aggregation(*order_by, aggregation)

            self.set_cache([aggregation, primary, secondary, categories, values])
        else:
            self.monitor.update(10, "Found in cache. Rendering..".format(**locals()))

        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, values)

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
Пример #29
0
    def _clean_aggregation(self, field_name):
        field_value = self.cleaned_data[field_name]

        if not field_value:
            return None

        if field_value == "articleset":
            return aggregate_es.ArticlesetCategory(self.articlesets)

        if field_value == "term":
            terms = SelectionSearch(self).get_queries()
            return aggregate_es.TermCategory(terms)

        if field_value.endswith(INTERVALS):
            fieldname, interval = field_value.rsplit("_", 1)
            return aggregate_es.IntervalCategory(field=fieldname, interval=interval, fill_zeros=self.cleaned_data["fill_zeroes"])

        if field_value.endswith("_str"):
            # _str is added to disambiguate between fields and intervals
            field_value, _ = field_value.rsplit("_", 1)

        return FieldCategory.from_fieldname(field_value)
Пример #30
0
    def run(self, form):
        provenance = None  #form.cleaned_data["provenance"] #TODO: is dit correct?
        job_size = form.cleaned_data["job_size"]

        self.monitor.update(10, "Executing query..")
        article_ids = list(SelectionSearch(form).get_article_ids())

        cj = CodingJob()
        cj.project = self.project
        cj.name = form.cleaned_data["name"]
        cj.unitschema = form.cleaned_data["unitschema"]
        cj.articleschema = form.cleaned_data["articleschema"]
        cj.coder = form.cleaned_data["coder"]
        cj.insertuser = self.user

        self.monitor.update(50, "Creating codingjobs..")

        if job_size == 0:
            job_size = len(article_ids)

        n_batches = len(article_ids) // job_size
        n_batches += 1 if len(article_ids) % job_size else 0

        for i, cid in enumerate(
                _create_codingjob_batches(cj, article_ids, job_size)):
            progress = int((i / float(n_batches)) * (100 // 2))
            msg = "Creating codingjob {} of {}..".format(i + 1, n_batches)
            print(50 + progress)
            self.monitor.update(50 + progress, msg)

            if provenance:
                cj = CodingJob.objects.get(id=cid)
                cj.provenance = provenance
                cj.save()

        return "Codingjob(s) created."
Пример #31
0
    def run(self, form):
        form_data = json.dumps(dict(form.data.lists()))

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        show_aggregation = form.cleaned_data['aggregations']

        with Timer() as timer:
            selection = SelectionSearch(form)
            self.monitor.update(1, "Executing query..")
            narticles = selection.get_count()
            self.monitor.update(39, "Fetching mediums..".format(**locals()))
            mediums = selection.get_mediums()
            self.monitor.update(59, "Fetching articles..".format(**locals()))
            articles = [escape_article_result(art) for art in selection.get_articles(size=size, offset=offset)]

            if show_aggregation:
                self.monitor.update(69, "Aggregating..".format(**locals()))
                
                statistics = selection.get_statistics()
                try:
                    delta_start_end = statistics.end_date - statistics.start_date
                    interval = next(interval for (interval, delta) in TIMEDELTAS
                                    if MAX_DATE_GROUPS * delta > delta_start_end)
                except (StopIteration, TypeError):
                    interval = "day"

                date_aggr = selection.get_nested_aggregate([IntervalCategory(interval)])
                date_aggr = fill_zeroes((((date,),(value,)) for date,value in date_aggr), IntervalCategory(interval))
                medium_aggr = selection.get_nested_aggregate([MediumCategory()])
            
            self.monitor.update(79, "Rendering results..".format(**locals()))

        return TEMPLATE.render(Context(dict(locals(), **{
            "project": self.project, "user": self.user
        })))
Пример #32
0
 def clean(self):
     # This is a bit of a hack. We need all the other fields to be correclty validated
     # in order to validate the query field.
     SelectionSearch(self).get_query()
     return self.cleaned_data
Пример #33
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch(form)
        try:
            aggregation, primary, secondary, categories, values = self.get_cache(
            )
        except NotInCacheError:
            narticles = selection.get_count()
            self.monitor.update(
                10,
                "Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            codingjobs = form.cleaned_data["codingjobs"]
            primary = form.cleaned_data['primary']
            secondary = form.cleaned_data['secondary']
            value1 = form.cleaned_data['value1']
            value2 = form.cleaned_data['value2']

            article_ids = selection.get_article_ids()

            # This should probably happen in SelectionForm?
            coded_articles = CodedArticle.objects.all()
            coded_articles = coded_articles.filter(article__id__in=article_ids)
            coded_articles = coded_articles.filter(
                codingjob__id__in=codingjobs)

            coded_article_ids = set(coded_articles.values_list("id",
                                                               flat=True))
            for field_name in ("1", "2", "3"):
                if not coded_article_ids:
                    break

                schemafield = form.cleaned_data["codingschemafield_{}".format(
                    field_name)]
                schemafield_values = form.cleaned_data[
                    "codingschemafield_value_{}".format(field_name)]
                schemafield_include_descendants = form.cleaned_data[
                    "codingschemafield_include_descendants_{}".format(
                        field_name)]

                if schemafield and schemafield_values:
                    code_ids = get_code_filter(
                        schemafield.codebook, schemafield_values,
                        schemafield_include_descendants)
                    coding_values = CodingValue.objects.filter(
                        coding__coded_article__id__in=coded_article_ids)
                    coding_values = coding_values.filter(
                        field__id=schemafield.id)
                    coding_values = coding_values.filter(intval__in=code_ids)
                    coded_article_ids &= set(
                        coding_values.values_list("coding__coded_article__id",
                                                  flat=True))

            codings = Coding.objects.filter(
                coded_article__id__in=coded_article_ids)

            terms = selection.get_article_ids_per_query()
            orm_aggregate = ORMAggregate(codings, flat=False, terms=terms)
            categories = list(filter(None, [primary, secondary]))
            values = list(filter(None, [value1, value2]))
            aggregation = orm_aggregate.get_aggregate(categories, values)
            aggregation = sorted(aggregation, key=to_sortable_tuple)

            self.set_cache(
                [aggregation, primary, secondary, categories, values])
        else:
            self.monitor.update(
                10, "Found in cache. Rendering..".format(**locals()))

        if form.cleaned_data.get("primary_fill_zeroes") and hasattr(
                primary, 'interval'):
            aggregation = list(
                aggregate_es.fill_zeroes(aggregation, primary, secondary))
        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, values)

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(aggregation,
                          cls=AggregationEncoder,
                          check_circular=False)