Exemplo n.º 1
0
    def suggest(self, **kwargs):
        """ Did you mean functionality """
        suggest = None
        es_obj = self.es_obj or utils.get_elasticsearch_object()

        payload = {
            "index": utils.get_elasticsearch_tor_index(),
            "doc_type": utils.get_elasticsearch_type(),
            "size": 0,
            "body": {
                "suggest": {
                    "text": kwargs.get('q'),
                    "simple-phrase": {
                        "phrase": {
                            "field": "fancy",
                            "gram_size": 2   # todo make this applicable?
                        }
                    }
                }
            }
        }
        resp = es_obj.search(**payload)

        try:
            suggestions = resp['suggest']['simple-phrase'][0]['options']
            if len(suggestions) > 0:
                suggest = suggestions[0]['text']
        except (TypeError, ValueError) as e:
            logger.exception(e)

        return suggest
Exemplo n.º 2
0
    def __init__(self):
        super(BaseCommand, self).__init__()

        self.es_obj = Elasticsearch(
            hosts=settings.ELASTICSEARCH_SERVERS,
            timeout=settings.ELASTICSEARCH_TIMEOUT,
            connection_class=RequestsHttpConnection
        )
        self.es_index = utils.get_elasticsearch_tor_index()
Exemplo n.º 3
0
 def get_es_context(self, **kwargs):
     return {
         "index": utils.get_elasticsearch_tor_index(),
         "doc_type": utils.get_elasticsearch_type(),
         "body": {
             "query": {
                 "bool": {
                     "must": [{
                         "multi_match": {
                             "query":
                             kwargs['q'],
                             "type":
                             "most_fields",
                             "fields": [
                                 'title^6',
                                 'anchor^6',
                                 'fancy.shingles^3',
                                 'fancy.stemmed^3',
                                 'fancy^3',
                                 'content^1',
                             ],
                             "minimum_should_match":
                             "75%",
                             "cutoff_frequency":
                             0.01
                         }
                     }],
                     "must_not": [{
                         "exists": {
                             # todo duplicate key since its defined as python dict
                             "field": "is_fake",
                             "field": "is_banned"
                         }
                     }]
                     # "filter": [
                     #     {
                     #         "missing": {
                     #             "field": "is_fake"
                     #         }
                     #     },
                     #     {
                     #         "missing": {
                     #             "field": "is_banned"
                     #         }
                     #     }
                     # ]
                 }
             },
             "suggest": {
                 "text": kwargs.get('q'),
                 "simple-phrase": {
                     "phrase": {
                         "field": "fancy",
                         "gram_size": 2  # todo make this applicable?
                     }
                 }
             },
             "aggregations": {
                 "domains": {
                     "terms": {
                         "size": 1000,
                         "field": "domain",
                         "order": {
                             "max_score": "desc"
                         }
                     },
                     "aggregations": {
                         "score": {
                             "top_hits": {
                                 "size":
                                 1,
                                 "sort": [{
                                     "authority": {
                                         "order": "desc",
                                         "missing": 0.0000000001
                                     }
                                 }, {
                                     "_score": {
                                         "order": "desc"
                                     }
                                 }],
                                 "_source": {
                                     "include": [
                                         "title", "url", "meta",
                                         "updated_on", "domain",
                                         "authority", "anchors", "links"
                                     ]
                                 }
                             }
                         },
                         "max_score": {
                             "max": {
                                 "script": "_score"
                             }
                         },
                     }
                 }
             }
         },
         "size": 0
     }
Exemplo n.º 4
0
 def get_es_context(self, **kwargs):
     return {
         "index": utils.get_elasticsearch_tor_index(),
         "doc_type": utils.get_elasticsearch_type(),
         "body": {
             "query": {
                 "bool": {
                     "must": [
                         {
                             "multi_match": {
                                 "query": kwargs['q'],
                                 "type": "most_fields",
                                 "fields": [
                                     'title^6',
                                     'anchor^6',
                                     'fancy.shingles^3',
                                     'fancy.stemmed^3',
                                     'fancy^3',
                                     'content^1',
                                 ],
                                 "minimum_should_match": "75%",
                                 "cutoff_frequency": 0.01
                             }
                         }
                     ],
                     "must_not": [
                         {
                             "exists": {
                                 # todo duplicate key since its defined as python dict
                                 "field": "is_fake",
                                 "field": "is_banned"
                             }
                         }
                     ]
                     # "filter": [
                     #     {
                     #         "missing": {
                     #             "field": "is_fake"
                     #         }
                     #     },
                     #     {
                     #         "missing": {
                     #             "field": "is_banned"
                     #         }
                     #     }
                     # ]
                 }
             },
             "suggest": {
                 "text": kwargs.get('q'),
                 "simple-phrase": {
                     "phrase": {
                         "field": "fancy",
                         "gram_size": 2  # todo make this applicable?
                     }
                 }
             },
             "aggregations": {
                 "domains": {
                     "terms": {
                         "size": 1000,
                         "field": "domain",
                         "order": {"max_score": "desc"}
                     },
                     "aggregations": {
                         "score": {
                             "top_hits": {
                                 "size": 1,
                                 "sort": [
                                     {
                                         "authority": {
                                             "order": "desc",
                                             "missing": 0.0000000001
                                         }
                                     },
                                     {
                                         "_score": {
                                             "order": "desc"
                                         }
                                     }
                                 ],
                                 "_source": {
                                     "include": ["title", "url", "meta",
                                                 "updated_on", "domain",
                                                 "authority", "anchors",
                                                 "links"]
                                 }
                             }
                         },
                         "max_score": {
                             "max": {
                                 "script": "_score"
                             }
                         },
                     }
                 }
             }
         },
         "size": 0
     }
Exemplo n.º 5
0
    def __init__(self):
        super(BaseCommand, self).__init__()

        self.es_obj = utils.get_elasticsearch_object()
        self.es_index = utils.get_elasticsearch_tor_index()