Exemplo n.º 1
0
def clusters():
    """
    Отображает AJAX-версию страницы с кластеризацией. Предназначено для замены
    display_clusters() после тестирования.
    """
    person = request.values.get('filter')
    print(person)
    Face._index.refresh()

    total = Face.search().count()
    named = Face.search().filter("exists", field="person").count()
    status = "{:.1%} ({} out of {}) faces are named. Clusters count: {}".format(
        named / total, named, total, Cluster.search().count())

    a = A("terms", field="person.raw", size=10000)
    ps = Search()
    ps.aggs.bucket("persons", a)
    psr = ps.execute()

    persons = [b.key for b in psr.aggs['persons']]

    if person:
        s = Cluster.search().filter("prefix", person=person).sort("-face_count")
        results = s[0:10000].execute()
    else:
        s = Cluster.search().exclude("exists", field="person")
        s.query = FunctionScore(query=s.query,
                                functions=[SF('random_score', weight=100),
                                           SF('field_value_factor',
                                              field="face_count", weight=1)],
                                score_mode="avg", boost_mode="replace")
        results = s[0:50].execute()

    return render_template('clusters.html', clusters=results, persons=persons,
                           status=status)
Exemplo n.º 2
0
    def get_queryset_posts(self):
        """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible.

        Score is modified if:

        + post is the first one in a topic;
        + post is marked as "useful";
        + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0.
        """

        query = Match(_type='post') \
            & Terms(forum_pk=self.authorized_forums) \
            & Term(is_visible=True) \
            & MultiMatch(query=self.search_query, fields=['text_html'])

        functions_score = [
            {'filter': Match(position=1), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_first']},
            {'filter': Match(is_useful=True), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_useful']},
            {
                'filter': Range(like_dislike_ratio={'gt': 1}),
                'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1']
            },
            {
                'filter': Range(like_dislike_ratio={'lt': 1}),
                'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1']
            }
        ]

        scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)

        return scored_query
Exemplo n.º 3
0
    def get_queryset_posts(self):
        """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible.

        Score is modified if:

        + post is the first one in a topic;
        + post is marked as "useful";
        + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0.
        """

        query = (
            Match(_type="post")
            & Terms(forum_pk=self.authorized_forums)
            & Term(is_visible=True)
            & MultiMatch(query=self.search_query, fields=["text_html"])
        )

        functions_score = [
            {"filter": Match(position=1), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_first"]},
            {"filter": Match(is_useful=True), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"]},
            {
                "filter": Range(like_dislike_ratio={"gt": 1}),
                "weight": settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_above_1"],
            },
            {
                "filter": Range(like_dislike_ratio={"lt": 1}),
                "weight": settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_below_1"],
            },
        ]

        scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score)

        return scored_query
Exemplo n.º 4
0
def search_releases(term: str, user_id: str):
    """Search music releases with priority for previously listened tracks."""
    search = Release.search()
    search.query = FunctionScore(
        query=MultiMatch(
            query=term,
            type="bool_prefix",
            fields=["title", "title._2gram", "title._3gram"],
        ),
        functions=[
            # Weight results with higher listen count
            FieldValueFactor(
                field=f'user_listens.{user_id}',
                missing=0,
            )
        ],
    )

    response = search.execute()

    return [{
        "id": release.id,
        "title": release.title,
        "listens": release.user_listens.to_dict().get(user_id, 0),
    } for release in response]
Exemplo n.º 5
0
    def get_queryset_topics(self):
        """Search in topics, and remove the result if the forum is not allowed for the user.

        Score is modified if:

        + topic is solved;
        + topic is sticky;
        + topic is locked.
        """

        query = (
            Match(_type="topic")
            & Terms(forum_pk=self.authorized_forums)
            & MultiMatch(query=self.search_query, fields=["title", "subtitle", "tags"])
        )

        functions_score = [
            {"filter": Match(is_solved=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"]},
            {"filter": Match(is_sticky=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"]},
            {"filter": Match(is_locked=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_locked"]},
        ]

        scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score)

        return scored_query
Exemplo n.º 6
0
    def query(self, search, query):
        """Manipulates the query to support nested queries and a custom rank for pages."""
        search = search.highlight_options(**self._highlight_options)

        queries = self._get_queries(
            query=query,
            fields=self.fields,
        )

        sections_nested_query = self._get_nested_query(
            query=query,
            path='sections',
            fields=self._section_fields,
        )

        domains_nested_query = self._get_nested_query(
            query=query,
            path='domains',
            fields=self._domain_fields,
        )

        queries.extend([sections_nested_query, domains_nested_query])
        final_query = FunctionScore(
            query=Bool(should=queries),
            script_score=self._get_script_score(),
        )
        search = search.query(final_query)
        return search
Exemplo n.º 7
0
    def get_queryset_publishedcontents(self):
        """Find in PublishedContents.
        """

        query = Match(_type='publishedcontent') \
            & MultiMatch(query=self.search_query, fields=['title', 'description', 'categories', 'tags', 'text'])

        functions_score = [
            {
                'filter': Match(content_type='TUTORIAL'),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial']
            },
            {
                'filter': Match(content_type='TUTORIAL') & Match(has_chapters=True),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial']
            },
            {
                'filter': Match(content_type='ARTICLE'),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article']
            },
            {
                'filter': Match(content_type='OPINION'),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion']
            },
            {
                'filter': Match(content_type='OPINION') & Match(picked=False),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked']
            },
        ]

        scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)

        return scored_query
Exemplo n.º 8
0
    def get(self, request, *args, **kwargs):
        if 'q' in request.GET:
            self.search_query = ''.join(request.GET['q'])

        results = []
        if self.index_manager.connected_to_es and self.search_query:
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()
            query = Match(_type='topic') \
                & Terms(forum_pk=self.authorized_forums) \
                & MultiMatch(query=self.search_query, fields=['title', 'subtitle', 'tags'])

            functions_score = [
                {'filter': Match(is_solved=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_solved']},
                {'filter': Match(is_sticky=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_sticky']},
                {'filter': Match(is_locked=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_locked']}
            ]

            scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)
            search_queryset = search_queryset.query(scored_query)[:10]

            # Build the result
            for hit in search_queryset.execute():
                result = {'id': hit.pk, 'url': str(hit.get_absolute_url), 'title': str(hit.title)}
                results.append(result)

        data = {'results': results}
        return HttpResponse(json.dumps(data), content_type='application/json')
Exemplo n.º 9
0
    def get(self, request, *args, **kwargs):
        if "q" in request.GET:
            self.search_query = "".join(request.GET["q"])
        excluded_content_ids = request.GET.get("excluded", "").split(",")
        results = []
        if self.index_manager.connected_to_es and self.search_query:
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()
            if len(excluded_content_ids) > 0 and excluded_content_ids != [""]:
                search_queryset = search_queryset.exclude(
                    "terms", content_pk=excluded_content_ids)
            query = Match(_type="publishedcontent") & MultiMatch(
                query=self.search_query, fields=["title", "description"])

            functions_score = [
                {
                    "filter":
                    Match(content_type="TUTORIAL"),
                    "weight":
                    settings.ZDS_APP["search"]["boosts"]["publishedcontent"]
                    ["if_tutorial"],
                },
                {
                    "filter":
                    Match(content_type="ARTICLE"),
                    "weight":
                    settings.ZDS_APP["search"]["boosts"]["publishedcontent"]
                    ["if_article"],
                },
                {
                    "filter":
                    Match(content_type="OPINION"),
                    "weight":
                    settings.ZDS_APP["search"]["boosts"]["publishedcontent"]
                    ["if_opinion"],
                },
            ]

            scored_query = FunctionScore(query=query,
                                         boost_mode="multiply",
                                         functions=functions_score)
            search_queryset = search_queryset.query(scored_query)[:10]

            # Build the result
            for hit in search_queryset.execute():
                result = {
                    "id": hit.content_pk,
                    "pubdate": hit.publication_date,
                    "title": str(hit.title),
                    "description": str(hit.description),
                }
                results.append(result)

        data = {"results": results}

        return HttpResponse(json_handler.dumps(data),
                            content_type="application/json")
Exemplo n.º 10
0
    def get(self, request, *args, **kwargs):
        if "q" in request.GET:
            self.search_query = "".join(request.GET["q"])

        results = []
        if self.index_manager.connected_to_es and self.search_query:
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()
            query = (Match(_type="topic")
                     & Terms(forum_pk=self.authorized_forums)
                     & MultiMatch(query=self.search_query,
                                  fields=["title", "subtitle", "tags"]))

            functions_score = [
                {
                    "filter":
                    Match(is_solved=True),
                    "weight":
                    settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"]
                },
                {
                    "filter":
                    Match(is_sticky=True),
                    "weight":
                    settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"]
                },
                {
                    "filter":
                    Match(is_locked=True),
                    "weight":
                    settings.ZDS_APP["search"]["boosts"]["topic"]["if_locked"]
                },
            ]

            scored_query = FunctionScore(query=query,
                                         boost_mode="multiply",
                                         functions=functions_score)
            search_queryset = search_queryset.query(scored_query)[:10]

            # Build the result
            for hit in search_queryset.execute():
                result = {
                    "id": hit.pk,
                    "url": str(hit.get_absolute_url),
                    "title": str(hit.title),
                    "subtitle": str(hit.subtitle),
                    "forumTitle": str(hit.forum_title),
                    "forumUrl": str(hit.forum_get_absolute_url),
                    "pubdate": str(hit.pubdate),
                }
                results.append(result)

        data = {"results": results}
        return HttpResponse(json_handler.dumps(data),
                            content_type="application/json")
Exemplo n.º 11
0
def TagBoost(slugs, boost_mode="multiply", weight=5):
    included, excluded = _parse_slugs(slugs)
    return FunctionScore(boost_mode=boost_mode,
                         functions=[{
                             "filter":
                             Nested(path="tags",
                                    filter=Terms(**{"tags.slug": included})),
                             "weight":
                             weight
                         }])
Exemplo n.º 12
0
 def build_search(self):
     s = super(SearchQuery, self).build_search()
     # Handle scoring functions
     if self.boosters:
         s.query = FunctionScore(
             query=s.query, functions=[b.to_query() for b in self.boosters])
     # Until https://github.com/elastic/elasticsearch-dsl-py/pull/474
     # is merged and released
     s = s.fields([])
     return s
Exemplo n.º 13
0
    def query(self, search, query):
        """Manipulates the query to support nested queries and a custom rank for pages."""
        search = search.highlight_options(**self._highlight_options)

        all_queries = []

        # match query for the title (of the page) field.
        for operator in self.operators:
            query_string = self._get_text_query(
                query=query,
                fields=self.fields,
                operator=operator,
            )
            all_queries.append(query_string)

        # nested query for search in sections
        sections_nested_query = self.generate_nested_query(
            query=query,
            path='sections',
            fields=self._section_fields,
            inner_hits={
                'highlight': dict(
                    self._highlight_options,
                    fields={
                        'sections.title': {},
                        'sections.content': {},
                    }
                )
            }
        )

        # nested query for search in domains
        domains_nested_query = self.generate_nested_query(
            query=query,
            path='domains',
            fields=self._domain_fields,
            inner_hits={
                'highlight': dict(
                    self._highlight_options,
                    fields={
                        'domains.name': {},
                        'domains.docstrings': {},
                    }
                )
            }
        )

        all_queries.extend([sections_nested_query, domains_nested_query])

        final_query = FunctionScore(
            query=Bool(should=all_queries),
            script_score=self._get_script_score(),
        )
        search = search.query(final_query)
        return search
Exemplo n.º 14
0
    def get(self, request, *args, **kwargs):
        if 'q' in request.GET:
            self.search_query = ''.join(request.GET['q'])
        excluded_content_ids = request.GET.get('excluded', '').split(',')
        results = []
        if self.index_manager.connected_to_es and self.search_query:
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()
            if len(excluded_content_ids) > 0 and excluded_content_ids != ['']:
                search_queryset = search_queryset.exclude(
                    'terms', content_pk=excluded_content_ids)
            query = Match(_type='publishedcontent') & MultiMatch(
                query=self.search_query, fields=['title', 'description'])

            functions_score = [{
                'filter':
                Match(content_type='TUTORIAL'),
                'weight':
                settings.ZDS_APP['search']['boosts']['publishedcontent']
                ['if_tutorial']
            }, {
                'filter':
                Match(content_type='ARTICLE'),
                'weight':
                settings.ZDS_APP['search']['boosts']['publishedcontent']
                ['if_article']
            }, {
                'filter':
                Match(content_type='OPINION'),
                'weight':
                settings.ZDS_APP['search']['boosts']['publishedcontent']
                ['if_opinion']
            }]

            scored_query = FunctionScore(query=query,
                                         boost_mode='multiply',
                                         functions=functions_score)
            search_queryset = search_queryset.query(scored_query)[:10]

            # Build the result
            for hit in search_queryset.execute():
                result = {
                    'id': hit.content_pk,
                    'pubdate': hit.publication_date,
                    'title': str(hit.title),
                    'description': str(hit.description)
                }
                results.append(result)

        data = {'results': results}

        return HttpResponse(json_handler.dumps(data),
                            content_type='application/json')
Exemplo n.º 15
0
 def query(self, search, query):
     if query:
         return search.query("simple_query_string",
                             fields=self.fields,
                             query=query,
                             default_operator='and')
     else:
         search.query = FunctionScore(
             query=Q(),
             functions=[SF('random_score', seed=int(time.time()))])
         return search
Exemplo n.º 16
0
    def get_queryset(self):
        if not self.index_manager.connected_to_es:
            messages.warning(self.request, _(u'Impossible de se connecter à Elasticsearch'))
            return []

        if self.search_query:

            # find forums the user is allowed to visit
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()

            # setting the different querysets (according to the selected models, if any)
            part_querysets = []
            chosen_groups = self.search_form.cleaned_data['models']

            if chosen_groups:
                models = []
                for group in chosen_groups:
                    if group in settings.ZDS_APP['search']['search_groups']:
                        models.append(settings.ZDS_APP['search']['search_groups'][group][1])
            else:
                models = [v[1] for k, v in settings.ZDS_APP['search']['search_groups'].iteritems()]

            models = reduce(operator.concat, models)

            for model in models:
                part_querysets.append(getattr(self, 'get_queryset_{}s'.format(model))())

            queryset = part_querysets[0]
            for query in part_querysets[1:]:
                queryset |= query

            # weighting:
            weight_functions = []
            for _type, weights in settings.ZDS_APP['search']['boosts'].items():
                if _type in models:
                    weight_functions.append({'filter': Match(_type=_type), 'weight': weights['global']})

            scored_queryset = FunctionScore(query=queryset, boost_mode='multiply', functions=weight_functions)
            search_queryset = search_queryset.query(scored_queryset)

            # highlighting:
            search_queryset = search_queryset.highlight_options(
                fragment_size=150, number_of_fragments=5, pre_tags=['[hl]'], post_tags=['[/hl]'])
            search_queryset = search_queryset.highlight('text').highlight('text_html')

            # executing:
            return self.index_manager.setup_search(search_queryset)

        return []
Exemplo n.º 17
0
def random(request):
    """ Redirect to a random case over 1,000 words. """
    s = CaseDocument.search().source(['frontend_url']).filter('range', analysis__word_count={'gte':1000})
    s.query = FunctionScore(
        query=s.query,  # omit this if not applying a filter first
        functions=[
            SF('random_score'),
            # to weight by pagerank:
            # SF('field_value_factor', field='analysis.pagerank.percentile', modifier="ln1p", missing=0)
        ],
        boost_mode='replace',
    )
    random_case = s[0].execute()[0]
    return HttpResponseRedirect(random_case.frontend_url)
Exemplo n.º 18
0
    def query(self, search, query):
        """
        Manipulates the query to support nested queries and a custom rank for pages.

        If `self.projects` was given, we use it to filter the documents that
        match the same project and version.
        """
        search = search.highlight_options(**self._highlight_options)
        search = search.source(excludes=self.excludes)

        queries = self._get_queries(
            query=query,
            fields=self.fields,
        )

        sections_nested_query = self._get_nested_query(
            query=query,
            path='sections',
            fields=self._section_fields,
        )

        domains_nested_query = self._get_nested_query(
            query=query,
            path='domains',
            fields=self._domain_fields,
        )

        queries.extend([sections_nested_query, domains_nested_query])
        bool_query = Bool(should=queries)

        if self.projects:
            versions_query = [
                Bool(must=[
                    Term(project={'value': project}),
                    Term(version={'value': version}),
                ]) for project, version in self.projects.items()
            ]
            bool_query = Bool(must=[bool_query, Bool(should=versions_query)])

        final_query = FunctionScore(
            query=bool_query,
            script_score=self._get_script_score(),
        )
        search = search.query(final_query)
        return search
Exemplo n.º 19
0
    def search_keywords(self, words):
        s = Search(using=self.client)
        or_queries = []
        for w in words:
            m = MultiMatch(query=w, fields=self.spec_fields)
            or_queries.append(m)

        q = reduce(operator.or_, or_queries)
        s.query = q

        script = {
            'script': {
                'source':
                "_score * sigmoid(2.74, 1, Math.sqrt(doc['info.opinion_stats.positive'].value) - doc['info.opinion_stats.negative'].value) * (doc['info.opinion_stats.positive'].value + 1) / (doc['info.opinion_stats.total'].value + 1)"
            }
        }
        s.query = FunctionScore(query=q, script_score=script)

        return s.execute()
Exemplo n.º 20
0
    def get_queryset_publishedcontents(self):
        """Search in PublishedContent objects."""

        query = Match(_type="publishedcontent") & MultiMatch(
            query=self.search_query, fields=["title", "description", "categories", "subcategories", "tags", "text"]
        )

        if self.from_library:
            query &= Match(content_type="TUTORIAL") | Match(content_type="ARTICLE")

        if self.content_category:
            query &= Match(categories=self.content_category)

        if self.content_subcategory:
            query &= Match(subcategories=self.content_subcategory)

        functions_score = [
            {
                "filter": Match(content_type="TUTORIAL"),
                "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"]["if_tutorial"],
            },
            {
                "filter": Match(content_type="TUTORIAL") & Match(has_chapters=True),
                "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"]["if_medium_or_big_tutorial"],
            },
            {
                "filter": Match(content_type="ARTICLE"),
                "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"]["if_article"],
            },
            {
                "filter": Match(content_type="OPINION"),
                "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"]["if_opinion"],
            },
            {
                "filter": Match(content_type="OPINION") & Match(picked=False),
                "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"]["if_opinion_not_picked"],
            },
        ]

        scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score)

        return scored_query
Exemplo n.º 21
0
 def run(self):
     emails = {
         'breached': set(),
         'unbreached': set(),
     }
     # contact_email exists
     must = [Q('exists', field='contact_email')]
     # matches source if specified
     if self.source:
         must.append(Q({'term': {'analysis.source': self.source}}))
     # not already tagged with breached
     s = Search(using=self.es).\
         query(FunctionScore(
               query=Q('bool',
                       must=must,
                       must_not=[Q('exists', field='analysis.breached')]),
               functions=[SF('random_score', seed=int(time.time()))]
         )).\
         source(['contact_email'])
     print('%s breached: source=%s limit=%s' %
           (datetime.now().isoformat(), self.source, self.limit))
     print('query=\n%s' % json.dumps(s.to_dict()))
     for filing in s[:self.limit]:
         email = filing['contact_email']
         if not email or email in emails['breached'] or email in emails[
                 'unbreached']:
             continue
         breached = self.is_breached(email)
         emails['breached' if breached else 'unbreached'].add(email)
     docs = []
     print('done source=%s' % self.source)
     if emails['breached']:
         docs += self.tag_by_email(list(emails['breached']), True)
     if emails['unbreached']:
         docs += self.tag_by_email(list(emails['unbreached']), False)
     try:
         lib.bulk_update(self.es, docs)
     except Exception as e:
         print('error indexing: %s' % e)
Exemplo n.º 22
0
    def get_queryset_topics(self):
        """Search in topics, and remove the result if the forum is not allowed for the user.

        Score is modified if:

        + topic is solved;
        + topic is sticky;
        + topic is locked.
        """

        query = Match(_type='topic') \
            & Terms(forum_pk=self.authorized_forums) \
            & MultiMatch(query=self.search_query, fields=['title', 'subtitle', 'tags'])

        functions_score = [
            {'filter': Match(is_solved=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_solved']},
            {'filter': Match(is_sticky=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_sticky']},
            {'filter': Match(is_locked=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_locked']}
        ]

        scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)

        return scored_query
Exemplo n.º 23
0
def browse(request):

    s = Search(using=es)
    description = None

    s.query = FunctionScore(
        query=s.query, functions=[SF('random_score', seed=int(time.time()))])

    if 'source' in request.GET:
        source = request.GET['source']
        s = s.filter('terms', **{'analysis.source': [source]})
        description = SOURCE_MAP.get(source, {}).get('name') or source
    elif 'titleii' in request.GET:
        title_ii = request.GET['titleii']
        if title_ii == 'pro':
            s = s.filter('terms', **{'analysis.titleii': [True]})
            description = "Pro Title II"
        elif title_ii == 'anti':
            description = 'Anti Title II'
            s = s.filter('terms', **{'analysis.titleii': [False]})
        elif title_ii == 'unknown':
            description = 'Uncategorized'
            s = s.exclude('exists', field='analysis.titleii')

    s.aggs.bucket('address', A('terms', field='analysis.fulladdress'))
    s.aggs.bucket('site', A('terms', field='analysis.onsite'))

    s.aggs.bucket(
        'email_confirmation',
        A('filters',
          filters={
              'true': {
                  'term': {
                      'emailConfirmation': 'true'
                  }
              },
              'false': {
                  'term': {
                      'emailConfirmation': 'false'
                  }
              }
          }))

    s.aggs.bucket('unique_emails', A('cardinality', field='contact_email.raw'))

    # s.aggs.bucket('email_confirmation', A('filters', field='analysis.fulladdress'))

    stats = OrderedDict({
        'Comment Form': {
            'On-site': 0,
            'Off-site': 0
        },
        'Emails': {
            'Unique': 0,
        },
        'Address': {
            'Full Address': 0,
            'Partial Address': 0,
        },
        'Email Confirmation': {
            'True': 0,
            'False': 0,
            'Missing': 0
        }
    })

    response = s[:50].execute()
    total = s.count()
    for bucket in response.aggregations.address.buckets:
        if bucket.key == 1:
            stats['Address']['Full Address'] = bucket.doc_count
        elif bucket.key == 0:
            stats['Address']['Partial Address'] = bucket.doc_count

    for bucket in response.aggregations.site.buckets:
        if bucket.key == 1:
            stats['Comment Form']['On-site'] = bucket.doc_count
        elif bucket.key == 0:
            stats['Comment Form']['Off-site'] = bucket.doc_count

    stats['Emails']['Unique'] = response.aggregations.unique_emails.value

    for bucket, value in response.aggs.email_confirmation.to_dict(
    )['buckets'].items():
        if bucket == 'true':
            stats['Email Confirmation']['True'] = value['doc_count']
        elif bucket == 'false':
            stats['Email Confirmation']['False'] = value['doc_count']
    stats['Email Confirmation']['Missing'] = (
        total - stats['Email Confirmation']['True'] -
        stats['Email Confirmation']['False'])

    context = {
        'description': description,
        'stats': stats,
        'results': response,
        'comment_count': total
    }

    return render(request, 'listing.html', context)
Exemplo n.º 24
0
    def get_sort_popularity(self, request):
        score = FunctionScore(
            score_mode='sum',
            functions=[
                SF(
                    'field_value_factor',
                    field='status_score',
                    weight=10,
                    factor=10
                ),
                SF(
                    'gauss',
                    weight=0.1,
                    created={
                        'scale': "365d"
                    },
                ),
            ]
        ) | FunctionScore(
            score_mode='multiply',
            functions=[
                SF(
                    'field_value_factor',
                    field='contribution_count',
                    missing=0
                ),
                SF(
                    'gauss',
                    weight=0.1,
                    multi_value_mode='avg',
                    contributions={
                        'scale': '5d'
                    },
                ),
            ]
        )

        if request.user.is_authenticated:
            if request.user.skills:
                score = score | FunctionScore(
                    score_mode='first',
                    functions=[
                        SF({
                            'filter': Nested(
                                path='expertise',
                                query=Q(
                                    'terms',
                                    expertise__id=[skill.pk for skill in request.user.skills.all()]
                                )
                            ),
                            'weight': 1,
                        }),
                        SF({'weight': 0}),
                    ]
                )

            if request.user.favourite_themes:
                score = score | FunctionScore(
                    score_mode='first',
                    functions=[
                        SF({
                            'filter': Nested(
                                path='theme',
                                query=Q(
                                    'terms',
                                    theme__id=[theme.pk for theme in request.user.favourite_themes.all()]
                                )
                            ),
                            'weight': 1,
                        }),
                        SF({'weight': 0}),
                    ]
                )

            position = None
            if request.user.location and request.user.location.position:
                position = {
                    'lat': request.user.location.position.latitude,
                    'lon': request.user.location.position.longitude
                }
            elif request.user.place and request.user.place.position:
                position = {
                    'lat': request.user.place.position.latitude,
                    'lon': request.user.place.position.longitude
                }

            if position:
                score = score | FunctionScore(
                    score_mode='first',
                    functions=[
                        SF({
                            'filter': {'exists': {'field': 'position'}},
                            'weight': 1,
                            'gauss': {
                                'position': {
                                    'origin': position,
                                    'scale': "100km"
                                },
                                'multi_value_mode': 'max',
                            },
                        }),
                        SF({'weight': 0}),
                    ]
                )

        return score
Exemplo n.º 25
0
def SponsoredBoost(field_name, boost_mode="multiply", weight=5):
    return FunctionScore(boost_mode=boost_mode,
                         functions=[{
                             "filter": Exists(field=field_name),
                             "weight": weight
                         }])
Exemplo n.º 26
0
    def handle(self, *args, **options):
        corrected = NACPDeclaration.search().filter("term",
                                                    intro__corrected=True)

        cntr = 0
        success_rate = 0
        for i, d in enumerate(corrected.scan()):
            must = [
                ConstantScore(query=Q(
                    "multi_match",
                    query=d.general.full_name,
                    operator="and",
                    fields=[
                        "general.last_name",
                        "general.name",
                        "general.patronymic",
                        "general.full_name",
                    ],
                ),
                              boost=10)
            ]

            should = [
                ConstantScore(query=Q(
                    "match",
                    general__post__post={
                        "query": d.general.post.post,
                        "minimum_should_match": "50%"
                    },
                ),
                              boost=2),
                ConstantScore(query=Q(
                    "match",
                    general__post__office={
                        "query": d.general.post.office,
                        "minimum_should_match": "50%"
                    },
                ),
                              boost=2),
                ConstantScore(query=Q(
                    "match",
                    general__post__region={
                        "query": d.general.post.region.replace(" область", ""),
                        "minimum_should_match": "60%"
                    },
                ),
                              boost=1)
            ]

            for fam in getattr(d.general, "family", []):
                should.append(
                    ConstantScore(query=Q(
                        "multi_match",
                        query=fam.family_name,
                        operator="and",
                        fields=["general.family.family_name"]),
                                  boost=2))

            candidates = NACPDeclaration.search() \
                .query(
                    FunctionScore(
                        query=Q("bool", must=must, should=should),
                        score_mode="sum"
                    )
                ) \
                .filter("term",
                    intro__declaration_year=d.intro.declaration_year) \
                .query(~Q('term', _id=d.meta.id)) \
                .filter("term", intro__corrected=False) \
                .query(
                    ConstantScore(
                        query=Q("term", intro__doc_type=d.intro.doc_type),
                        boost=0
                    )
                )

            if options["store_matches"]:
                candidates = candidates \
                    .highlight_options(
                        order='score', fragment_size=500,
                        number_of_fragments=100, pre_tags=['||!'],
                        post_tags=["||"]) \
                    .highlight(
                        "general.full_name", "general.post.region",
                        "general.post.office", "general.post.post",
                        "general.family.family_name")

            candidates = candidates.execute()

            success = self.store_example(
                d,
                candidates,
                debug=options["debug"],
                store_matches=options["store_matches"])

            if success:
                success_rate += 1

            cntr += 1

            if cntr and cntr % 5000 == 0:
                self.stdout.write("%s declarations processed, SR: %s%%" %
                                  (cntr, success_rate / cntr * 100))

        self.stdout.write("%s declarations processed, SR: %s%%" %
                          (cntr, success_rate / cntr * 100))

        if options["store_matches"]:
            self.save_to_excel(options["store_matches"])
Exemplo n.º 27
0
def browse(request, sentiment=None, group=None):

    s = Search(using=es, index="fcc-comments")
    description = None

    s.query = FunctionScore(
        query=s.query, functions=[SF('random_score', seed=int(time.time()))]
    )

    if group:
        source = group
        s = s.filter('terms', **{'analysis.source.keyword': [source]})
        description = SOURCE_MAP.get(source, {}).get('name') or source
        details = SOURCE_MAP.get(source, {}).get('details') or ""
        url = SOURCE_MAP.get(source, {}).get('url') or ""

    elif sentiment:
        title_ii = sentiment
        if title_ii == 'pro':
            s = s.filter('terms', **{'analysis.titleii': [True]})
            description = "Pro Title II"
        elif title_ii == 'anti':
            description = 'Anti Title II'
            s = s.filter('terms', **{'analysis.titleii': [False]})
        elif title_ii == 'unknown':
            description = 'Uncategorized'
            s = s.exclude('exists', field='analysis.titleii')
        details, url = "", None
    
    s.aggs.bucket("date", A('date_histogram', field='date_submission', interval='month'))
    s.aggs.bucket('address', A('terms', field='analysis.fulladdress'))
    s.aggs.bucket('email_domain', A('terms', field='analysis.throwawayemail'))
    s.aggs.bucket('site', A('terms', field='analysis.onsite'))
    s.aggs.bucket('ingestion', A('terms', field='analysis.ingestion_method.keyword'))
    s.aggs.bucket('email_confirmation', A('filters', filters={
        'true': {'term': {'emailConfirmation': 'true'}},
        'false': {'term': {'emailConfirmation': 'false'}}
    }))

    # s.aggs.bucket('unique_emails', A('cardinality', field='contact_email.raw'))


    stats = OrderedDict({
        'Comment Form': {
            'On-site': 0,
            'Off-site': 0
        },
        'Throwaway Email': {
            'True': 0,
            'False': 0
        },
        'Address': {
            'Full Address': 0,
            'Partial Address': 0,
        },
        'Email Confirmation': {
            'True': 0,
            'False': 0,
            'Missing': 0
        },
        'Filing Method': {
            'API': 0,
            'Spreadsheet': 0,
            'Direct': 0
        },
        'Filing Dates': OrderedDict({
            
        })
    })

    response = s[:50].execute()
    total = s.count()

    for bucket in response.aggregations.date.buckets:
        d = datetime.fromtimestamp((bucket.key/1000.) + 14400)
        title = "%s/17 - %s" % (d.strftime("%m"), d.strftime("%B"))
        stats['Filing Dates'][title] = bucket.doc_count

    for bucket in response.aggregations.address.buckets:
        if bucket.key == 1:
            stats['Address']['Full Address'] = bucket.doc_count
        elif bucket.key == 0:
            stats['Address']['Partial Address'] = bucket.doc_count

    for bucket in response.aggregations.email_domain.buckets:
        if bucket.key == 1:
            stats['Throwaway Email']['True'] = bucket.doc_count
        elif bucket.key == 0:
            stats['Throwaway Email']['False'] = bucket.doc_count

    for bucket in response.aggregations.ingestion.buckets:
        if bucket.key == "api":
            stats['Filing Method']['API'] = bucket.doc_count
        elif bucket.key == "csv":
            stats['Filing Method']['Spreadsheet'] = bucket.doc_count
        elif bucket.key == "direct":
            stats['Filing Method']['Direct'] = bucket.doc_count


    for bucket in response.aggregations.site.buckets:
        if bucket.key == 1:
            stats['Comment Form']['On-site'] = bucket.doc_count
        elif bucket.key == 0:
            stats['Comment Form']['Off-site'] = bucket.doc_count

    # stats['Emails']['Unique'] = response.aggregations.unique_emails.value

    for bucket, value in response.aggs.email_confirmation.to_dict()['buckets'].items():
        if bucket == 'true':
            stats['Email Confirmation']['True'] = value['doc_count']
        elif bucket == 'false':
            stats['Email Confirmation']['False'] = value['doc_count']
    stats['Email Confirmation']['Missing'] = (
        total - stats['Email Confirmation']['True'] - stats['Email Confirmation']['False']
    )

    context = {
        'description': description,
        'details': details,
        'url': url,
        'stats': stats,
        'results': response,
        'comment_count': total
    }

    return render(request, 'listing.html', context)
Exemplo n.º 28
0
    def get_queryset(self):
        if not self.index_manager.connected_to_es:
            messages.warning(self.request,
                             _("Impossible de se connecter à Elasticsearch"))
            return []

        if self.search_query:

            # Searches forums the user is allowed to visit
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()

            # Restrict (sub)category if any
            if self.search_form.cleaned_data["category"]:
                self.content_category = self.search_form.cleaned_data[
                    "category"]
            if self.search_form.cleaned_data["subcategory"]:
                self.content_subcategory = self.search_form.cleaned_data[
                    "subcategory"]

            # Mark that contents must come from library if required
            self.from_library = False
            if self.search_form.cleaned_data["from_library"] == "on":
                self.from_library = True

            # Setting the different querysets (according to the selected models, if any)
            part_querysets = []
            chosen_groups = self.search_form.cleaned_data["models"]

            if chosen_groups:
                models = []
                for group in chosen_groups:
                    if group in settings.ZDS_APP["search"]["search_groups"]:
                        models.append(settings.ZDS_APP["search"]
                                      ["search_groups"][group][1])
            else:
                models = [
                    v[1] for k, v in settings.ZDS_APP["search"]
                    ["search_groups"].items()
                ]

            models = reduce(operator.concat, models)

            for model in models:
                part_querysets.append(
                    getattr(self, f"get_queryset_{model}s")())

            queryset = part_querysets[0]
            for query in part_querysets[1:]:
                queryset |= query

            # Weighting:
            weight_functions = []
            for _type, weights in list(
                    settings.ZDS_APP["search"]["boosts"].items()):
                if _type in models:
                    weight_functions.append({
                        "filter": Match(_type=_type),
                        "weight": weights["global"]
                    })

            scored_queryset = FunctionScore(query=queryset,
                                            boost_mode="multiply",
                                            functions=weight_functions)
            search_queryset = search_queryset.query(scored_queryset)

            # Highlighting:
            search_queryset = search_queryset.highlight_options(
                fragment_size=150,
                number_of_fragments=5,
                pre_tags=["[hl]"],
                post_tags=["[/hl]"])
            search_queryset = search_queryset.highlight("text").highlight(
                "text_html")

            # Executing:
            return self.index_manager.setup_search(search_queryset)

        return []