def get_es_feed_query(self, sq, region=mkt.regions.RESTOFWORLD.id, carrier=None, original_region=None): """ Build ES query for feed. Must match region. Orders by FeedItem.order. Boosted operator shelf matching region + carrier. Boosted operator shelf matching original_region + carrier. region -- region ID (integer) carrier -- carrier ID (integer) original_region -- region from before we were falling back, to keep the original shelf atop the RoW feed. """ region_filter = es_filter.Term(region=region) shelf_filter = es_filter.Term(item_type=feed.FEED_TYPE_SHELF) ordering_fn = es_function.FieldValueFactor( field='order', modifier='reciprocal', filter=es_filter.Bool(must=[region_filter], must_not=[shelf_filter])) boost_fn = es_function.BoostFactor(value=10000.0, filter=shelf_filter) if carrier is None: # If no carrier, just match the region and exclude shelves. return sq.query('function_score', functions=[ordering_fn], filter=es_filter.Bool(must=[region_filter], must_not=[shelf_filter])) # Must match region. # But also include the original region if we falling back to RoW. # The only original region feed item that will be included is a shelf # else we wouldn't be falling back in the first place. region_filters = [region_filter] if original_region: region_filters.append(es_filter.Term(region=original_region)) return sq.query( 'function_score', functions=[boost_fn, ordering_fn], filter=es_filter.Bool( should=region_filters, # Filter out shelves that don't match the carrier. must_not=[ es_filter.Bool(must=[shelf_filter], must_not=[es_filter.Term(carrier=carrier)]) ]))
def get_es_feed_element_query(self, sq, feed_items): """ From a list of FeedItems with normalized feed element IDs, return an ES query that fetches the feed elements for each feed item. """ filters = [] for feed_item in feed_items: item_type = feed_item['item_type'] filters.append(es_filter.Bool( must=[es_filter.Term(id=feed_item[item_type]), es_filter.Term(item_type=item_type)])) return sq.filter(es_filter.Bool(should=filters))[0:len(feed_items)]
def get_feed_element_filter(self, sq, item_type, slug): """Matches a single feed element.""" bool_filter = es_filter.Bool(must=[ es_filter.Term(item_type=item_type), es_filter.Term(**{'slug.raw': slug}) ]) return sq.filter(bool_filter)
def search(self): """Return a search using the combined query of all associated special coverage objects.""" # Retrieve all Or filters pertinent to the special coverage query. should_filters = [ es_filter.Terms(pk=self.query.get("included_ids", [])), es_filter.Terms(pk=self.query.get("pinned_ids", [])) ] should_filters += self.get_group_filters() # Compile list of all Must filters. must_filters = [ es_filter.Bool(should=should_filters), ~es_filter.Terms(pk=self.query.get("excluded_ids", [])) ] return Content.search_objects.search().filter(es_filter.Bool(must=must_filters))
def get_apps(self, request, app_ids): """ Takes a list of app_ids. Gets the apps, including filters. Returns an app_map for serializer context. """ if request.QUERY_PARAMS.get('filtering', '1') == '0': # Without filtering. sq = WebappIndexer.search().filter(es_filter.Bool( should=[es_filter.Terms(id=app_ids)] ))[0:len(app_ids)] else: # With filtering. sq = WebappIndexer.get_app_filter(request, { 'device': self._get_device(request) }, app_ids=app_ids) # Store the apps to attach to feed elements later. apps = sq.execute().hits return dict((app.id, app) for app in apps)
def get_featured_websites(self): """ Get up to 11 featured MOWs for the request's region. If less than 11 are available, make up the difference with globally-featured MOWs. """ REGION_TAG = 'featured-website-%s' % self.request.REGION.slug region_filter = es_filter.Term(tags=REGION_TAG) GLOBAL_TAG = 'featured-website' global_filter = es_filter.Term(tags=GLOBAL_TAG) mow_query = query.Q( 'function_score', filter=es_filter.Bool(should=[region_filter, global_filter]), functions=[ SF('random_score', seed=self._get_daily_seed()), es_function.BoostFactor(value=100.0, filter=region_filter) ], ) es = Search(using=WebsiteIndexer.get_es())[:11] results = es.query(mow_query).execute().hits return ESWebsiteSerializer(results, many=True).data
def filter_queryset(self, request, queryset, view): daily_seed = int(datetime.datetime.now().strftime('%Y%m%d')) # Map over the game categories to create a function score query for one # and dump it into a Bool should. game_query = query.Q( 'function_score', filter=es_filter.Bool( should=[es_filter.Term(tags=cat) for cat in GAME_CATEGORIES]), # Consistently random based on the day. functions=[SF('random_score', seed=daily_seed)], ) # Buckets by tag. Run a size=1 TopHits aggregation to only select one # game from each tag. Results will have to be pulled out of # S.execute().aggregations rather than S.execute().hits. top_hits = aggs.TopHits(size=1) a = aggs.A('terms', field='tags', aggs={'first_game': top_hits}) queryset = queryset.query(game_query)[0:4] queryset.aggs.bucket('top_hits', a) # Not chainable. return queryset
def _get_colombia_filter(self): if self.allow_colombia and self.request.REGION == mkt.regions.COL: return None co_filter = es_filter.Term(tags=COLOMBIA_WEBSITE) return es_filter.F(es_filter.Bool(must_not=[co_filter]), )
def get_app_filter(cls, request, additional_data=None, sq=None, app_ids=None, no_filter=False): """ THE grand, consolidated ES filter for Webapps. By default: - Excludes non-public apps. - Excludes disabled apps (whether by reviewer or by developer). - Excludes based on region exclusions. - TODO: Excludes based on device and platform support. additional_data -- an object with more data to allow more filtering. sq -- if you have an existing search object to filter off of. app_ids -- if you want to filter by a list of app IDs. no_filter -- doesn't apply the consumer-side excludes (public/region). """ from mkt.api.base import get_region_from_request from mkt.search.views import name_query sq = sq or cls.search() additional_data = additional_data or {} app_ids = app_ids or [] data = { 'app_type': [], 'author.raw': None, 'category': None, # Slug. 'device': None, # ID. 'gaia': getattr(request, 'GAIA', False), 'is_offline': None, 'manifest_url': '', 'mobile': getattr(request, 'MOBILE', False), 'premium_type': [], 'profile': get_feature_profile(request), 'q': '', 'region': getattr(get_region_from_request(request), 'id', None), 'status': None, 'supported_locales': [], 'tablet': getattr(request, 'TABLET', False), 'tags': '', } data.update(additional_data) # Fields that will be filtered with a term query. term_fields = ('author.raw', 'device', 'manifest_url', 'status', 'tags') # Fields that will be filtered with a terms query. terms_fields = ('category', 'premium_type', 'app_type', 'supported_locales') # QUERY. if data['q']: # Function score for popularity boosting (defaults to multiply). sq = sq.query( 'function_score', query=name_query(data['q'].lower()), functions=[query.SF('field_value_factor', field='boost')]) # MUST. must = [ F('term', status=amo.STATUS_PUBLIC), F('term', is_disabled=False), ] if not no_filter else [] for field in term_fields + terms_fields: # Term filters. if data[field]: filter_type = 'term' if field in term_fields else 'terms' must.append(F(filter_type, **{field: data[field]})) if not no_filter: if data['profile']: # Feature filters. profile = data['profile'] for k, v in profile.to_kwargs(prefix='features.has_').items(): must.append(F('term', **{k: v})) if data['mobile'] or data['gaia']: # Uses flash. must.append(F('term', uses_flash=False)) if data['is_offline'] is not None: must.append(F('term', is_offline=data['is_offline'])) # SHOULD. should = [] if app_ids: should = [es_filter.Terms(id=list(set(app_ids)))] sq = sq[0:len(set(app_ids))] # FILTER. if must or should: sq = sq.filter(es_filter.Bool(must=must, should=should)) if data['region'] and not no_filter: # Region exclusions. sq = sq.filter(~F('term', region_exclusions=data['region'])) return sq