def _run_cached(cls, query, bq, sort="relevance", rank_expressions=None, faceting=None, start=0, num=1000, _update=False): '''Query the cloudsearch API. _update parameter allows for supposed easy memoization at later date. Example result set: {u'facets': {u'reddit': {u'constraints': [{u'count': 114, u'value': u'politics'}, {u'count': 42, u'value': u'atheism'}, {u'count': 27, u'value': u'wtf'}, {u'count': 19, u'value': u'gaming'}, {u'count': 12, u'value': u'bestof'}, {u'count': 12, u'value': u'tf2'}, {u'count': 11, u'value': u'AdviceAnimals'}, {u'count': 9, u'value': u'todayilearned'}, {u'count': 9, u'value': u'pics'}, {u'count': 9, u'value': u'funny'}]}}, u'hits': {u'found': 399, u'hit': [{u'id': u't3_11111'}, {u'id': u't3_22222'}, {u'id': u't3_33333'}, {u'id': u't3_44444'}, ... ], u'start': 0}, u'info': {u'cpu-time-ms': 10, u'messages': [{u'code': u'CS-InvalidFieldOrRankAliasInRankParameter', u'message': u"Unable to create score object for rank '-hot'", u'severity': u'warning'}], u'rid': u'<hash>', u'time-ms': 9}, u'match-expr': u"(label 'my query')", u'rank': u'-text_relevance'} ''' try: response = basic_query(query=query, bq=bq, size=num, start=start, rank=sort, rank_expressions=rank_expressions, search_api=cls.search_api, faceting=faceting, record_stats=True) except (SearchHTTPError, SearchError) as e: g.log.error("Search Error: %r", e) raise warnings = response['info'].get('messages', []) for warning in warnings: g.log.warning("%(code)s (%(severity)s): %(message)s" % warning) hits = response['hits']['found'] docs = [doc['id'] for doc in response['hits']['hit']] facets = response.get('facets', {}) for facet in facets.keys(): values = facets[facet]['constraints'] facets[facet] = values results = Results(docs, hits, facets) return results
def run(self, after=None, reverse=False, num=1000, _update=False): self.bq = u'' results = self._run(_update=_update) docs, hits, facets = results.docs, results.hits, results._facets after_docs = r2utils.get_after(docs, after, num, reverse=reverse) self.results = Results(after_docs, hits, facets) return self.results
def _run(self, _update=False): '''Run the search against self.query''' try: self._parse() except InvalidQuery: return Results([], 0, {}) if g.sqlprinting: g.log.info("%s", self) return self._run_cached(self.q, self.bq.encode('utf-8'), self.sort, self.rank_expressions, self.faceting, start=self.start, num=self.num, _update=_update)
def _run_cached(cls, query, bq, sort="score", faceting=None, start=0, num=1000, _update=False): '''Query the solr HOST. _update parameter allows for supposed easy memoization at later date. Example result set: { u'responseHeader':{ u'status':0, u'QTime':2, u'params':{ u'sort':u'activity desc', u'defType':u'edismax', u'q':u'coffee', u'start':u'0', u'wt':u'json', u'size':u'1000' } }, u'response':{ u'start':0, u'numFound':1, u'docs':[ { u'_version_':1496564637825499136, u'type_id':5, u'reddit':u'coffee', u'fullname':u't5_3', u'author':u'grandpa', u'url':u'http://hamsandwich.com/sideoffries/?attachment_id=44', u'num_comments':0, u'downs':1, u'title':u'013', u'site':u"[u'reddit.com',u'hamsandwich.reddit.com']", u'author_s': u'grandpa', u'over18': False, u'timestamp': 1427180669, u'sr_id': 2, u'author_fullname': u't2_1', u'is_self': False, u'subreddit': u'coffee', u'ups': 0, u'id': u't5_3'}, { ] } } ''' if not query: return Results([], 0, {}) try: response = basic_query(query=query, bq=bq, size=num, start=start, rank=sort, search_api=cls.search_api, faceting=faceting, record_stats=True) except (SearchHTTPError, SearchError) as e: g.log.error("Search Error: %r", e) raise hits = response['response']['numFound'] docs = [doc['id'] for doc in response['response']['docs']] facets = {} if hits and faceting: facet_fields = response['facet_counts'].get('facet_fields', {}) for field in facet_fields: facets[field] = [] while facet_fields[field]: value = facet_fields[field].pop(0) count = facet_fields[field].pop(0) facets[field].append(dict(value=value, count=count)) results = Results(docs, hits, facets) return results
def run(self, _update=False): results = self._run(_update=_update) self.results = Results(results.docs, results.hits, results._facets) return self.results