Exemplo n.º 1
0
 def get_es_context(self, **kwargs):
     return {
         "index": utils.get_elasticsearch_index(),
         "doc_type": utils.get_elasticsearch_type(),
         "size": 0,
         "body": {
             "query": {
                 "constant_score": {
                     "filter": {
                         "term": {
                             "is_banned": 1
                         }
                     }
                 }
             },
             "aggs": {
                 "domains": {
                     "terms": {
                         "field": "domain",
                         "size": 1000
                     }
                 }
             }
         },
         "_source_include":
         ["title", "url", "meta", "updated_on", "domain"]
     }
Exemplo n.º 2
0
    def suggest(self, **kwargs):
        """ Did you mean functionality """
        suggest = None
        es_obj = self.es_obj or utils.get_elasticsearch_object()

        payload = {
            "index": utils.get_elasticsearch_tor_index(),
            "doc_type": utils.get_elasticsearch_type(),
            "size": 0,
            "body": {
                "suggest": {
                    "text": kwargs.get('q'),
                    "simple-phrase": {
                        "phrase": {
                            "field": "fancy",
                            "gram_size": 2   # todo make this applicable?
                        }
                    }
                }
            }
        }
        resp = es_obj.search(**payload)

        try:
            suggestions = resp['suggest']['simple-phrase'][0]['options']
            if len(suggestions) > 0:
                suggest = suggestions[0]['text']
        except (TypeError, ValueError) as e:
            logger.exception(e)

        return suggest
Exemplo n.º 3
0
 def get_es_context(self, **kwargs):
     return {
         "index": utils.get_elasticsearch_index(),
         "doc_type": utils.get_elasticsearch_type(),
         "size": 0,
         "body": {
             "aggs" : {
                 "domains" : {
                     "terms" : {"field" : "domain",
                                "size": 1000}
                 }
             }
         },
         "_source_include": ["title", "url", "meta", "updated_on", "domain"]
     }
Exemplo n.º 4
0
 def get_es_context(self, **kwargs):
     query = kwargs['q']
     return {
         "index": utils.get_elasticsearch_index(),
         "doc_type": utils.get_elasticsearch_type(),
         "body": {
             "query": {
                 "bool": {
                     "must": [{
                         "multi_match": {
                             "query":
                             query,
                             "type":
                             "most_fields",
                             "fields":
                             ["fancy", "fancy.stemmed", "fancy.shingles"],
                             "minimum_should_match":
                             "75%",
                             "cutoff_frequency":
                             0.01
                         }
                     }],
                     "must_not": [{
                         "exists": {
                             "field": "is_fake",
                             "field": "is_banned"
                         }
                     }]
                     # "filter": [
                     #     {
                     #         "missing": {
                     #             "field": "is_fake"
                     #         }
                     #     },
                     #     {
                     #         "missing": {
                     #             "field": "is_banned"
                     #         }
                     #     }
                     # ]
                 }
             },
             "aggregations": {
                 "domains": {
                     "terms": {
                         "size": 1000,
                         "field": "domain",
                         "order": {
                             "max_score": "desc"
                         }
                     },
                     "aggregations": {
                         "score": {
                             "top_hits": {
                                 "size":
                                 1,
                                 "sort": [{
                                     "authority": {
                                         "order": "desc",
                                         "missing": 0.0000000001
                                     }
                                 }, {
                                     "_score": {
                                         "order": "desc"
                                     }
                                 }],
                                 "_source": {
                                     "include": [
                                         "title", "url", "meta",
                                         "updated_on", "domain",
                                         "authority", "anchors"
                                     ]
                                 }
                             }
                         },
                         "max_score": {
                             "max": {
                                 "script": "_score"
                             }
                         }
                     }
                 }
             }
         },
         "size": 0
     }
Exemplo n.º 5
0
 def get_es_context(self, **kwargs):
     return {
         "index": utils.get_elasticsearch_tor_index(),
         "doc_type": utils.get_elasticsearch_type(),
         "body": {
             "query": {
                 "bool": {
                     "must": [{
                         "multi_match": {
                             "query":
                             kwargs['q'],
                             "type":
                             "most_fields",
                             "fields": [
                                 'title^6',
                                 'anchor^6',
                                 'fancy.shingles^3',
                                 'fancy.stemmed^3',
                                 'fancy^3',
                                 'content^1',
                             ],
                             "minimum_should_match":
                             "75%",
                             "cutoff_frequency":
                             0.01
                         }
                     }],
                     "must_not": [{
                         "exists": {
                             # todo duplicate key since its defined as python dict
                             "field": "is_fake",
                             "field": "is_banned"
                         }
                     }]
                     # "filter": [
                     #     {
                     #         "missing": {
                     #             "field": "is_fake"
                     #         }
                     #     },
                     #     {
                     #         "missing": {
                     #             "field": "is_banned"
                     #         }
                     #     }
                     # ]
                 }
             },
             "suggest": {
                 "text": kwargs.get('q'),
                 "simple-phrase": {
                     "phrase": {
                         "field": "fancy",
                         "gram_size": 2  # todo make this applicable?
                     }
                 }
             },
             "aggregations": {
                 "domains": {
                     "terms": {
                         "size": 1000,
                         "field": "domain",
                         "order": {
                             "max_score": "desc"
                         }
                     },
                     "aggregations": {
                         "score": {
                             "top_hits": {
                                 "size":
                                 1,
                                 "sort": [{
                                     "authority": {
                                         "order": "desc",
                                         "missing": 0.0000000001
                                     }
                                 }, {
                                     "_score": {
                                         "order": "desc"
                                     }
                                 }],
                                 "_source": {
                                     "include": [
                                         "title", "url", "meta",
                                         "updated_on", "domain",
                                         "authority", "anchors", "links"
                                     ]
                                 }
                             }
                         },
                         "max_score": {
                             "max": {
                                 "script": "_score"
                             }
                         },
                     }
                 }
             }
         },
         "size": 0
     }
Exemplo n.º 6
0
 def get_es_context(self, **kwargs):
     return {
         "index": utils.get_elasticsearch_tor_index(),
         "doc_type": utils.get_elasticsearch_type(),
         "body": {
             "query": {
                 "bool": {
                     "must": [
                         {
                             "multi_match": {
                                 "query": kwargs['q'],
                                 "type": "most_fields",
                                 "fields": [
                                     'title^6',
                                     'anchor^6',
                                     'fancy.shingles^3',
                                     'fancy.stemmed^3',
                                     'fancy^3',
                                     'content^1',
                                 ],
                                 "minimum_should_match": "75%",
                                 "cutoff_frequency": 0.01
                             }
                         }
                     ],
                     "must_not": [
                         {
                             "exists": {
                                 # todo duplicate key since its defined as python dict
                                 "field": "is_fake",
                                 "field": "is_banned"
                             }
                         }
                     ]
                     # "filter": [
                     #     {
                     #         "missing": {
                     #             "field": "is_fake"
                     #         }
                     #     },
                     #     {
                     #         "missing": {
                     #             "field": "is_banned"
                     #         }
                     #     }
                     # ]
                 }
             },
             "suggest": {
                 "text": kwargs.get('q'),
                 "simple-phrase": {
                     "phrase": {
                         "field": "fancy",
                         "gram_size": 2  # todo make this applicable?
                     }
                 }
             },
             "aggregations": {
                 "domains": {
                     "terms": {
                         "size": 1000,
                         "field": "domain",
                         "order": {"max_score": "desc"}
                     },
                     "aggregations": {
                         "score": {
                             "top_hits": {
                                 "size": 1,
                                 "sort": [
                                     {
                                         "authority": {
                                             "order": "desc",
                                             "missing": 0.0000000001
                                         }
                                     },
                                     {
                                         "_score": {
                                             "order": "desc"
                                         }
                                     }
                                 ],
                                 "_source": {
                                     "include": ["title", "url", "meta",
                                                 "updated_on", "domain",
                                                 "authority", "anchors",
                                                 "links"]
                                 }
                             }
                         },
                         "max_score": {
                             "max": {
                                 "script": "_score"
                             }
                         },
                     }
                 }
             }
         },
         "size": 0
     }
Exemplo n.º 7
0
    def get_es_context(self, **kwargs):
        query = kwargs['q']
        return {
            "index": utils.get_elasticsearch_index(),
            "doc_type": utils.get_elasticsearch_type(),
            "body": {
                "query": {
                    "bool": {
                        "must": [
                            {
                                "multi_match": {
                                    "query": query,
                                    "type":   "most_fields",
                                    "fields": [
                                        "fancy",
                                        "fancy.stemmed",
                                        "fancy.shingles"
                                    ],
                                    "minimum_should_match": "75%",
                                    "cutoff_frequency": 0.01
                                }
                            }
                        ],
                        "filter": [
                            {
                                "missing": {
                                    "field": "is_fake"
                                }
                            },
                            {
                                "missing": {
                                    "field": "is_banned"
                                }
                            }
                        ]
                    }

                },
                "aggregations" : {
                    "domains" : {
                        "terms" : {
                            "size" : 1000,
                            "field" : "domain",
                            "order": {"max_score": "desc"}
                        },
                        "aggregations": {
                            "score": {
                                "top_hits": {
                                    "size" : 1,
                                    "sort": [
                                        {
                                            "authority": {
                                                "order": "desc",
                                                "missing": 0.0000000001
                                            }
                                        },
                                        {
                                            "_score": {
                                                "order": "desc"
                                            }
                                        }
                                    ],
                                    "_source": {
                                        "include": ["title", "url", "meta",
                                                    "updated_on", "domain",
                                                    "authority", "anchors"]
                                    }
                                }
                            },
                            "max_score": {
                                "max": {
                                    "script": "_score"
                                }
                            }
                        }
                    }
                }
            },
            "size": 0
        }