Пример #1
0
def form_creation(request, context):
    group = get_user_group(request.user)
    eval_indices = ES_CLIENT.indices.get_alias(
        f"{ES_INDEX_DOCUMENT_EVAL}_*").keys()
    if request.user.is_superuser:
        context['criterions_list'] = EvalCriterion.objects.all()
    else:
        if not group:
            raise Forbidden("403")
        context['criterions_list'] = EvalCriterion.objects.filter(
            usergroup=group)
        eval_indices = filter(
            lambda x: parse_eval_index_name(x)['topic_modelling'] in group.
            topic_modelling_names.split(","), eval_indices)
    context['public_groups'] = TopicGroup.objects.filter(is_public=True)
    context['my_groups'] = TopicGroup.objects.filter(owner=request.user)
    context['topic_modellings'] = list(
        sorted(
            list(
                set([(parse_eval_index_name(tm)['topic_modelling'],
                      parse_eval_index_name(tm)['topic_modelling'].replace(
                          "bigartm", "tm")) for tm in eval_indices
                     if not parse_eval_index_name(tm)['ignore']]))))
    if not context['topic_modellings']:
        raise Forbidden("403")
    context['sources_list'] = Source.objects.filter(
        document__isnull=False).distinct()
    context['top_news_num'] = 1000
Пример #2
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        dashboard_id = kwargs['dashboard_id']
        if not self.request.user.is_superuser:
            group = get_user_group(self.request.user)
            if not group:
                return context
            dashboards = group.dashboard_presets.all()
        else:
            dashboards = DashboardPreset.objects.all()

        if dashboard_id == 0:
            context['dashboard_template'] = dashboards.first()
        else:
            context['dashboard_template'] = get_object_or_None(DashboardPreset,
                                                               id=dashboard_id)
        if context['dashboard_template'] not in dashboards:
            return context

        def widget_cache_hit_wrapper(widget):
            key = make_template_fragment_key('widget', [widget.id])
            if cache.get(key):
                return {}
            return widget.callable(widget)

        context['widgets'] = context['dashboard_template'].widgets.all(
        ).order_by('index')
        # Fill widget context
        for widget in context['widgets']:
            context.update(widget_cache_hit_wrapper(widget))
        return context
Пример #3
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        if self.request.user.is_superuser or hasattr(self.request.user,
                                                     "expert"):
            context['criterions'] = EvalCriterion.objects.all()
            if not self.request.user.is_superuser:
                group = get_user_group(self.request.user)
                context['criterions'] = context['criterions'].filter(
                    usergroup=group)
        form = self.form_class(data=self.request.GET, user=self.request.user)
        if not form.fields['topic_modelling'].choices:
            context['error'] = "403 FORBIDDEN"
            return context
        context['form'] = form
        if form.is_valid():
            context['topic_modelling'] = form.cleaned_data['topic_modelling']
            context['topic_weight_threshold'] = form.cleaned_data[
                'topic_weight_threshold']
        else:
            context['topic_modelling'] = form.fields[
                'topic_modelling'].choices[0][0]
            context['topic_weight_threshold'] = 0.05  # Initial

        key = make_template_fragment_key(
            'topics_list',
            [context['topic_modelling'], context['topic_weight_threshold']])
        if cache.get(key):
            return context

        tm_index = Search(using=ES_CLIENT, index=ES_INDEX_TOPIC_MODELLING) \
                .filter("term", name=context['topic_modelling']).execute()[0]
        is_multi_corpus = False
        if hasattr(tm_index, "is_multi_corpus"):
            is_multi_corpus = tm_index.is_multi_corpus
        topics = get_topics_with_meta(context['topic_modelling'],
                                      context['topic_weight_threshold'],
                                      is_multi_corpus)

        # Create context
        context['is_multi_corpus'] = is_multi_corpus
        context['has_resonance_score'] = any(
            (hasattr(topic, "high_resonance_score")
             and topic.high_resonance_score for topic in topics))
        if is_multi_corpus:
            corpuses = set()
            for topic in topics:
                if not hasattr(topic, "corpus_weights"):
                    continue
                for corpus in topic.corpus_weights:
                    corpuses.add(corpus)
            context['corpuses'] = list(corpuses)
        context['topics'] = sorted(
            [t for t in topics if len(t.topic_words) >= 5],
            key=lambda x: x.weight,
            reverse=True)
        context['rest_weight'] = sum([t.weight for t in topics[10:]])
        return context
Пример #4
0
def dashboard_list(request):
    if request.user.is_superuser:
        dashboards = DashboardPreset.objects.all()
    else:
        group = get_user_group(request.user)
        if not group:
            dashboards = DashboardPreset.objects.none()
        else:
            dashboards = DashboardPreset.objects.filter(usergroup=group)
    return {"dashboards": dashboards}
Пример #5
0
 def __init__(self,
              data=None,
              files=None,
              auto_id='id_%s',
              prefix=None,
              initial=None,
              error_class=ErrorList,
              label_suffix=None,
              empty_permitted=False,
              field_order=None,
              use_required_attribute=None,
              renderer=None,
              user=None):
     super().__init__(data, files, auto_id, prefix, initial, error_class,
                      label_suffix, empty_permitted, field_order,
                      use_required_attribute, renderer)
     if not user.is_superuser:
         group = get_user_group(user)
         self.fields['corpuses'].queryset = self.fields[
             'corpuses'].queryset.filter(usergroup=group)
         self.fields['sources'].queryset = self.fields[
             'sources'].queryset.filter(corpus__usergroup=group)
         self.fields['authors'].queryset = self.fields[
             'authors'].queryset.filter(corpus__usergroup=group)
Пример #6
0
    def __init__(self,
                 data=None,
                 files=None,
                 auto_id='id_%s',
                 prefix=None,
                 initial=None,
                 error_class=ErrorList,
                 label_suffix=None,
                 empty_permitted=False,
                 field_order=None,
                 use_required_attribute=None,
                 renderer=None,
                 user=None,
                 has_combo=False):
        super().__init__(data, files, auto_id, prefix, initial, error_class,
                         label_suffix, empty_permitted, field_order,
                         use_required_attribute, renderer)

        # Get topic_modellings
        s = Search(using=ES_CLIENT, index=ES_INDEX_TOPIC_MODELLING).filter('term', is_ready=True) \
                .source(['name', 'algorithm', 'number_of_topics', 'number_of_documents',
                         'source', 'datetime_from', 'datetime_to'
                         # 'perplexity', 'purity', 'contrast', 'coherence',
                         # 'tau_smooth_sparse_theta', 'tau_smooth_sparse_phi',
                         # 'tau_decorrelator_phi', 'tau_coherence_phi',
                         ])[:500]
        group = None
        if not user.is_superuser:
            group = get_user_group(user)
        topic_modellings = s.execute()
        topic_modellings = sorted(topic_modellings,
                                  key=lambda x: x.number_of_documents,
                                  reverse=True)
        topic_modellings = ((
            tm.name.lower(),
            f"{tm.name.replace('bigartm', 'tm')} - {tm.number_of_topics} топиков - {tm.number_of_documents} текстов - "
            + (f"{tm.source} - "
               if hasattr(tm, 'source') and tm.source else f"Все СМИ ") +
            (f"С {tm.datetime_from[:10]} - "
             if hasattr(tm, 'datetime_from') and tm.datetime_from else f"") +
            (f"По {tm.datetime_to[:10]} - "
             if hasattr(tm, 'datetime_to') and tm.datetime_to else f""))
                            for tm in topic_modellings
                            if user.is_superuser or (group and tm.name.lower(
                            ) in group.topic_modelling_names.split(",")))
        if has_combo:
            combo_indices = ES_CLIENT.indices.get_alias(
                f"{ES_INDEX_TOPIC_COMBOS}_*").keys()
            tms_with_combo = [
                ind.replace(f"{ES_INDEX_TOPIC_COMBOS}_", "").lower()
                for ind in combo_indices
            ]
            topic_modellings = filter(lambda x: x[0] in tms_with_combo,
                                      topic_modellings)
            self.fields['topic_weight_threshold'].required = False
        self.fields['topic_modelling'].choices = topic_modellings

        # Get topic_weight_thresholds
        self.fields[
            'topic_weight_threshold'].choices = get_topic_weight_threshold_options(
                user.is_superuser or hasattr(user, "expert"))
Пример #7
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)

        if not self.request.user.is_superuser:
            group = get_user_group(self.request.user)
            if not group or kwargs[
                    'topic_modelling'] not in group.topic_modelling_names.split(
                        ","):
                context['error'] = "403 FORBIDDEN"
                return context

        if 'topic_name' in kwargs:
            topics = [kwargs['topic_name']]
        else:
            topics = json.loads(self.request.GET['topics'])
            is_too_many_groups = len(topics) > 50

        # Forms Management
        try:
            context = abstract_documents_list_form_management(
                context, self.request, kwargs)
        except CacheHit:
            return context

        key = make_template_fragment_key('topic_detail',
                                         [kwargs, self.request.GET])
        if cache.get(key):
            return context

        # Total metrics
        total_metrics_dict = get_total_metrics(
            kwargs['topic_modelling'], context['granularity'],
            context['topic_weight_threshold'])

        # Current topic metrics
        topic_documents, number_of_documents = get_current_topics_metrics(
            kwargs['topic_modelling'],
            topics,
            context['granularity'],
            context['topic_weight_threshold'],
            intersection=self.request.GET.get("intersection", "") == "1")

        # Get documents, set weights
        documents = get_documents_with_weights(topic_documents)

        # Normalize
        normalize_topic_documnets(topic_documents, total_metrics_dict)

        # Separate signals
        absolute_power = [
            bucket.doc_count
            for bucket in topic_documents.aggregations.dynamics.buckets
        ]
        relative_power = [
            bucket.doc_count_normal
            for bucket in topic_documents.aggregations.dynamics.buckets
        ]
        relative_weight = [
            bucket.dynamics_weight.value
            for bucket in topic_documents.aggregations.dynamics.buckets
        ]

        # Smooth
        if context['smooth']:
            absolute_power = apply_fir_filter(
                absolute_power, granularity=context['granularity'])
            relative_power = apply_fir_filter(
                relative_power, granularity=context['granularity'])
            relative_weight = apply_fir_filter(
                relative_weight, granularity=context['granularity'])

        # Create context
        context['list_type'] = "topics"
        context['documents'] = unique_ize(documents, key=lambda x: x.id)
        context['number_of_documents'] = number_of_documents
        context['date_ticks'] = [
            bucket.key_as_string
            for bucket in topic_documents.aggregations.dynamics.buckets
        ]
        context['absolute_power'] = absolute_power
        context['relative_power'] = relative_power
        context['relative_weight'] = relative_weight
        context['source_weight'] = sorted(
            topic_documents.aggregations.source.buckets,
            key=lambda x: x.source_weight.value,
            reverse=True)
        return context
Пример #8
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        widget = Widget.objects.get(id=kwargs['widget_id'])
        monitoring_object = MonitoringObject.objects.get(
            id=kwargs['object_id'])

        if not self.request.user.is_superuser:
            group = get_user_group(self.request.user)
            if not group or widget.topic_modelling_name not in group.topic_modelling_names.split(
                    ","):
                context['error'] = "403 FORBIDDEN"
                return context

        context['monitoring_object'] = monitoring_object
        context['widget'] = widget

        s = es_widget_search_factory(widget, object_id=monitoring_object.id)
        number_of_documents = s.count()
        s = s.source(("document_es_id", "datetime"))[:1000 * 300]

        context['number_of_documents'] = number_of_documents
        # Forms Management
        try:
            context = abstract_documents_list_form_management(
                context, self.request, kwargs)
        except CacheHit:
            return context

        s.aggs.bucket("dynamics",
                      agg_type="date_histogram",
                      field="datetime",
                      calendar_interval=context['granularity'])
        s.aggs.bucket("source", agg_type="terms", field="document_source")
        r = s.execute()
        documents = Search(using=ES_CLIENT, index=ES_INDEX_DOCUMENT).filter(
            "terms", _id=list(set([d.document_es_id for d in r])))
        documents = documents.source(
            ("id", "title", "datetime", "source", "url"))[:2000]
        documents = documents.execute()

        # Separate signals
        absolute_power = [
            bucket.doc_count for bucket in r.aggregations.dynamics.buckets
        ]

        # Smooth
        if context['smooth']:
            absolute_power = apply_fir_filter(
                absolute_power, granularity=context['granularity'])

        # Create context
        context['list_type'] = "monitoring_object"
        context['documents'] = unique_ize(documents, key=lambda x: x.id)
        context['date_ticks'] = [
            bucket.key_as_string for bucket in r.aggregations.dynamics.buckets
        ]
        context['absolute_power'] = absolute_power
        context['source_weight'] = sorted(r.aggregations.source.buckets,
                                          key=lambda x: x.doc_count,
                                          reverse=True)
        return context