def get_sort_options(expressions=None, match_scorer=None, limit=1000): """A function to handle the sort expression API differences in 1.6.4 vs. 1.6.5+. An example of usage (NOTE: Do NOT put limit SortExpression or MatchScorer): expr_list = [ search.SortExpression(expression='author', default_value='', direction=search.SortExpression.DESCENDING)] sortopts = get_sort_options(expression=expr_list, limit=sort_limit) The returned value is used in constructing the query options: qoptions=search.QueryOptions(limit=doc_limit, sort_options=sortopts) Another example illustrating sorting on an expression based on a MatchScorer score: expr_list = [ search.SortExpression(expression='_score + 0.001 * rating', default_value='', direction=search.SortExpression.DESCENDING)] sortopts = get_sort_options(expression=expr_list, match_scorer=search.MatchScorer(), limit=sort_limit) Args: expression: a list of search.SortExpression. Do not set limit parameter on SortExpression match_scorer: a search.MatchScorer or search.RescoringMatchScorer. Do not set limit parameter on either scorer limit: the scoring limit Returns: the sort options value, either list of SortOption (1.6.4) or SortOptions (1.6.5), to set the sort_options field in the QueryOptions object. """ try: # using 1.6.5 or greater if search.SortOptions: logging.debug("search.SortOptions is defined.") return search.SortOptions(expressions=expressions, match_scorer=match_scorer, limit=limit) # SortOptions not available, so using 1.6.4 except AttributeError: logging.debug("search.SortOptions is not defined.") expr_list = [] # copy the sort expressions including the limit info if expressions: expr_list = [ search.SortExpression(expression=e.expression, direction=e.direction, default_value=e.default_value, limit=limit) for e in expressions ] # add the match scorer, if defined, to the expressions list. if isinstance(match_scorer, search.MatchScorer): expr_list.append(match_scorer.__class__(limit=limit)) logging.info("sort expressions: %s", expr_list) return expr_list
def get(self): self.response.headers['Content-Type'] = 'text/plain' search_query = str( urllib.unquote(cgi.escape(self.request.get('q')).lower()[:100])) name = str( urllib.unquote(cgi.escape(self.request.get('name')).lower()[:50])) token = str(urllib.unquote(cgi.escape(self.request.get('token')))) page_num = parse_int( urllib.unquote(cgi.escape(self.request.get('page_num'))), 1) page_size = parse_int( urllib.unquote(cgi.escape(self.request.get('page_size'))), 20) s = '' for c in search_query: if c in string.letters or c in string.digits or c == ' ': s += c search_query = s if search_query + name == "": d = {} d['data'] = [] d['token'] = token d['q'] = "" s = json.dumps(d) self.response.out.write(s) return if search_query == "": search_query = name if page_size > _PAGE_SIZE or page_size < 1: page_size = _PAGE_SIZE queries = map(str, search_query.split()) queries = sorted(queries) query_string = ' AND '.join(queries) d = {} d["data"] = [] d["token"] = token d["q"] = search_query data = [] #Sort results by first name descending expr_list = [ search.SortExpression(expression='first_name', default_value='', direction=search.SortExpression.DESCENDING) ] # construct the sort options sort_opts = search.SortOptions(expressions=expr_list) offset_num = (page_num - 1) * page_size query_options = search.QueryOptions(limit=page_size, offset=offset_num, ids_only=True, sort_options=sort_opts) results = search.Index(name=_INDEX_NAME).search(query=search.Query( query_string=query_string, options=query_options)) for result in results: rcsid = result.doc_id r = Person.get_by_id(rcsid) if r: per = Person.buildMap(r) per['name'] = per['name'].title() data.append(per) d["data"] = data s = json.dumps(d) self.response.out.write(s)
class Video(BaseDocumentManager): """Provides helper methods to manage Video documents. All Video documents built using these methods will include a core set of fields (see the _buildCoreVideoFields method). We use the given video id (the Video entity key) as the doc_id. This is not required for the entity/document design-- each explicitly point to each other, allowing their ids to be decoupled-- but using the video id as the doc id allows a document to be reindexed given its video info, without having to fetch the existing document.""" _INDEX_NAME = config.VIDEO_INDEX_NAME # 'core' video document field names UNIQUEID = config.FIELD_UNIQUE_ID TITLE = config.FIELD_TITLE CATEGORY = config.FIELD_EVENT SUBTITLE = config.FIELD_SUBTITLE DURATION_MIN = config.FIELD_DURATION SPEAKERS = config.FIELD_SPEAKERS DESCRIPTION = config.FIELD_DESC PUBLISHED_DATE = config.FIELD_PUB_DATE SLIDES_LINK = config.FIELD_SLIDES_LINK VIEWS = config.FIELD_VIEW_COUNT TAGS = config.FIELD_TAGS IMAGE = config.FIELD_IMAGE VID = config.FIELD_VIDEO_ID SESSIONID = config.FIELD_SESSION_ID AVG_RATING = 'ar' #average rating UPDATED = 'modified' _SORT_OPTIONS = [ [ # AVG_RATING, 'average rating', search.SortExpression( # expression=AVG_RATING, # direction=search.SortExpression.DESCENDING, # default_value=0) # ], [ VIEWS, Readable(VIEWS), search.SortExpression(expression=VIEWS, direction=search.SortExpression.DESCENDING, default_value=0) ], [ DURATION_MIN, Readable(DURATION_MIN), search.SortExpression(expression=DURATION_MIN, direction=search.SortExpression.ASCENDING, default_value=9999) ], [ PUBLISHED_DATE, Readable(PUBLISHED_DATE), search.SortExpression(expression=PUBLISHED_DATE, direction=search.SortExpression.DESCENDING, default_value=1) ], [ # UPDATED, Readable(UPDATED), search.SortExpression( # expression=UPDATED, # direction=search.SortExpression.DESCENDING, # default_value=1) # ], [ CATEGORY, Readable(CATEGORY), search.SortExpression(expression=CATEGORY, direction=search.SortExpression.ASCENDING, default_value='') ], [ TITLE, Readable(TITLE), search.SortExpression(expression=TITLE, direction=search.SortExpression.ASCENDING, default_value='zzz') ] ] _SORT_MENU = None _SORT_DICT = None @classmethod def deleteAllInVideoIndex(cls): cls.deleteAllInIndex() @classmethod def getSortMenu(cls): if not cls._SORT_MENU: cls._buildSortMenu() return cls._SORT_MENU @classmethod def getSortDict(cls): if not cls._SORT_DICT: cls._buildSortDict() return cls._SORT_DICT @classmethod def _buildSortMenu(cls): """Build the default set of sort options used for Video search. Of these options, all but 'relevance' reference core fields that all Videos will have.""" res = [(elt[0], elt[1]) for elt in cls._SORT_OPTIONS] cls._SORT_MENU = [('relevance', 'relevance')] + res @classmethod def _buildSortDict(cls): """Build a dict that maps sort option keywords to their corresponding SortExpressions.""" cls._SORT_DICT = {} for elt in cls._SORT_OPTIONS: cls._SORT_DICT[elt[0]] = elt[2] @classmethod def getDocFromUid(cls, uid): """Given a uid, get its doc. We're using the uid as the doc id, so we can do this via a direct fetch.""" return cls.getDoc(uid) @classmethod def removeVideoDocByUid(cls, uid): """Given a doc's vid, remove the doc matching it from the video index.""" cls.removeDocById(uid) @classmethod def updateRatingInDoc(cls, doc_id, avg_rating): # get the associated doc from the doc id in the video entity doc = cls.getDoc(doc_id) if doc: pdoc = cls(doc) pdoc.setAvgRating(avg_rating) # The use of the same id will cause the existing doc to be reindexed. return doc else: raise errors.OperationFailedError( 'Could not retrieve doc associated with id %s' % (doc_id, )) @classmethod def updateRatingsInfo(cls, doc_id, avg_rating): """Given a models.Video entity, update and reindex the associated document with the video entity's current average rating. """ ndoc = cls.updateRatingInDoc(doc_id, avg_rating) # reindex the returned updated doc return cls.add(ndoc) # 'accessor' convenience methods def getUniqueID(self): """Get the value of the 'uniqueid' field of a Video doc.""" return self.getFieldVal(self.UNIQUEID) def getTitle(self): """Get the value of the 'title' field of a Video doc.""" return self.getFieldVal(self.TITLE) def getCategory(self): """Get the value of the 'cat' field of a Video doc.""" return self.getFieldVal(self.CATEGORY) def setCategory(self, cat): """Set the value of the 'cat' (category) field of a Video doc.""" return self.setFirstField( search.NumberField(name=self.CATEGORY, value=cat)) def getSlidesLink(self): """Get the value of the 'slides_link' field of a Video doc.""" return self.getFieldVal(self.SLIDES_LINK) def getSubtitle(self): """Get the value of the 'sutitle' field of a Video doc.""" return self.getFieldVal(self.SUBTITLE) def getDurationMin(self): """Get the value of the 'duration_min' field of a Video doc.""" return self.getFieldVal(self.DURATION_MIN) def getSpeakers(self): """Get the value of the 'speakers' field of a Video doc.""" return self.getFieldVal(self.SPEAKERS) def getDescription(self): """Get the value of the 'description' field of a Video doc.""" return self.getFieldVal(self.DESCRIPTION) def getPublishedDate(self): """Get the value of the 'published_date' field of a Video doc.""" return self.getFieldVal(self.PUBLISHED_DATE) def getViews(self): """Get the value of the 'views' field of a Video doc.""" return self.getFieldVal(self.VIEWS) def getTags(self): """Get the value of the 'tags' field of a Video doc.""" return self.getFieldVal(self.TAGS) def getImage(self): """Get the value of the 'image' field of a Video doc.""" return self.getFieldVal(self.IMAGE) def getVID(self): """Get the value of the 'vid' field of a Video doc.""" return self.getFieldVal(self.VID) def getSessionID(self): """Get the value of the 'sessionid' field of a Video doc.""" return self.getFieldVal(self.SESSIONID) def getAvgRating(self): """Get the value of the 'ar' (average rating) field of a Video doc.""" return self.getFieldVal(self.AVG_RATING) def setAvgRating(self, ar): """Set the value of the 'ar' field of a Video doc.""" return self.setFirstField( search.NumberField(name=self.AVG_RATING, value=ar)) @classmethod def generateRatingsBuckets(cls, query_string): """Builds a dict of ratings 'buckets' and their counts, based on the value of the 'avg_rating" field for the documents retrieved by the given query. See the 'generateRatingsLinks' method. This information will be used to generate sidebar links that allow the user to drill down in query results based on rating. For demonstration purposes only; this will be expensive for large data sets. """ # do the query on the *full* search results # to generate the facet information, imitating what may in future be # provided by the FTS API. try: sq = search.Query(query_string=query_string.strip()) search_results = cls.getIndex().search(sq) except search.Error: logging.exception('An error occurred on search.') return None ratings_buckets = collections.defaultdict(int) # populate the buckets for res in search_results: ratings_buckets[int((cls(res)).getAvgRating() or 0)] += 1 return ratings_buckets @classmethod def generateRatingsLinks(cls, query, vhash): """Given a dict of ratings 'buckets' and their counts, builds a list of html snippets, to be displayed in the sidebar when showing results of a query. Each is a link that runs the query, additionally filtered by the indicated ratings interval.""" ratings_buckets = cls.generateRatingsBuckets(query) if not ratings_buckets: return None rlist = [] for k in range(config.RATING_MIN, config.RATING_MAX + 1): try: v = ratings_buckets[k] except KeyError: return # build html if k < 5: htext = '%s-%s (%s)' % (k, k + 1, v) else: htext = '%s (%s)' % (k, v) vhash['rating'] = k hlink = '/vsearch?' + urllib.urlencode(vhash) rlist.append((hlink, htext)) return rlist @classmethod def _buildCoreVideoFields(cls, unique_id, title, category, subtitle, duration_min, speakers, description, published_date, views, slides_link, tags, image, video_id, session_id): """Construct a 'core' document field list for the fields common to all Videos. The various categories (as defined in the file 'categories.py'), may add additional specialized fields; these will be appended to this core list. (see _buildVideoFields).""" fields = [ search.TextField(name=cls.UNIQUEID, value=unique_id), # The 'updated' field is always set to the current date. search.DateField(name=cls.UPDATED, value=datetime.datetime.now().date()), search.TextField(name=cls.TITLE, value=title), search.AtomField(name=cls.CATEGORY, value=category), search.TextField(name=cls.SUBTITLE, value=subtitle), search.NumberField(name=cls.DURATION_MIN, value=int(duration_min)), search.TextField(name=cls.SPEAKERS, value=ScrubSpeakers(speakers)), # strip the markup from the description value, which can # potentially come from user input. We do this so that # we don't need to sanitize the description in the # templates, showing off the Search API's ability to mark up query # terms in generated snippets. This is done only for # demonstration purposes; in an actual app, # it would be preferrable to use a library like Beautiful Soup # instead. # We'll let the templating library escape all other rendered # values for us, so this is the only field we do this for. search.TextField(name=cls.DESCRIPTION, value=ScrubDescription(description)), search.NumberField(name=cls.VIEWS, value=int(views)), search.TextField(name=cls.SLIDES_LINK, value=slides_link), search.TextField(name=cls.TAGS, value=tags), search.TextField(name=cls.IMAGE, value=image), search.TextField(name=cls.VID, value=video_id), search.TextField(name=cls.SESSIONID, value=session_id), search.NumberField(name=cls.AVG_RATING, value=0.0), ] # Some fields can sometimes be empty. scrubbed_publish_date = utils.dateFromDateString(published_date) if scrubbed_publish_date: fields.append( search.DateField(name=cls.PUBLISHED_DATE, value=scrubbed_publish_date)) return fields @classmethod def _buildVideoFields(cls, unique_id, category, title, category_name, subtitle, duration_min, speakers, description, published_date, views, slides_link, tags, image, video_id, session_id, **params): """Build all the additional non-core fields for a document of the given video type (category), using the given params dict, and the already-constructed list of 'core' fields. All such additional category-specific fields are treated as required. """ fields = cls._buildCoreVideoFields(unique_id, title, category, subtitle, duration_min, speakers, description, published_date, views, slides_link, tags, image, video_id, session_id) # get the specification of additional (non-'core') fields for this category vdict = categories.event_dict.get(category_name) if vdict: # for all fields for k, field_type in vdict.iteritems(): # see if there is a value in the given params for that field. # if there is, get the field type, create the field, and append to the # document field list. if k in params: v = params[k] if field_type == search.NumberField: try: val = float(v) fields.append(search.NumberField(name=k, value=val)) except ValueError: error_message = ( 'bad value %s for field %s of type %s' % (k, v, field_type)) logging.error(error_message) raise errors.OperationFailedError(error_message) elif field_type == search.TextField: fields.append(search.TextField(name=k, value=str(v))) else: # you may want to add handling of other field types for generality. # Not needed for our current sample data. logging.warn('not processed: %s, %s, of type %s', k, v, field_type) else: error_message = ( 'value not given for field "%s" of field type "%s"' % (k, field_type)) logging.warn(error_message) raise errors.OperationFailedError(error_message) #else: # # else, did not have an entry in the params dict for the given field. # logging.warn( # 'video field information not found for category name %s', # params['category_name']) return fields @classmethod def _createDocument(cls, unique_id=None, event=None, title=None, category_name=None, subtitle=None, duration_min=None, speakers=None, description=None, published_date=None, views=None, slides_link=None, tags=None, image=None, video_id=None, session_id=None, **params): """Create a Document object from given params.""" # check for the fields that are always required. if unique_id and event and title: # First, check that the given unique_id has only visible ascii characters, # and does not contain whitespace. The unique_id will be used as the # doc_id, which has these requirements. if not cls.isValidDocId(unique_id): raise errors.OperationFailedError('Illegal unique_id %s' % unique_id) # construct the document fields from the params resfields = cls._buildVideoFields(unique_id=unique_id, category=event, title=title, category_name=category_name, subtitle=subtitle, duration_min=duration_min, speakers=speakers, description=description, published_date=published_date, views=views, slides_link=slides_link, tags=tags, image=image, video_id=video_id, session_id=session_id, **params) # build and index the document. Use the video_id as the doc id. # (If we did not do this, and left the doc_id unspecified, an id would be # auto-generated.) d = search.Document(doc_id=unique_id, fields=resfields) return d else: raise errors.OperationFailedError('Missing parameter.') @classmethod def _normalizeParams(cls, params): """Normalize the submitted params for building a video.""" params = copy.deepcopy(params) try: params[cls.UNIQUEID] = params[cls.UNIQUEID].strip() params[cls.TITLE] = params[cls.TITLE].strip() params['category_name'] = params[cls.CATEGORY] params[cls.CATEGORY] = params[cls.CATEGORY] if params[cls.SUBTITLE]: params[cls.SUBTITLE] = params[cls.SUBTITLE].strip() if params[cls.SPEAKERS]: params[cls.SPEAKERS] = params[cls.SPEAKERS].strip() if params[cls.SLIDES_LINK]: params[cls.SLIDES_LINK] = params[cls.SLIDES_LINK].strip() try: params[cls.DURATION_MIN] = int(params[cls.DURATION_MIN]) except (ValueError, TypeError): error_message = 'bad duration_min value: %s' % params[ cls.DURATION_MIN] logging.error(error_message) raise errors.OperationFailedError(error_message) try: params[cls.VIEWS] = int(params[cls.VIEWS]) except ValueError: error_message = 'bad views value: %s' % params[cls.VIEWS] logging.error(error_message) raise errors.OperationFailedError(error_message) if params[cls.TAGS]: params[cls.TAGS] = params[cls.TAGS].strip() if params[cls.IMAGE]: params[cls.IMAGE] = params[cls.IMAGE].strip() if params[cls.VID]: params[cls.VID] = params[cls.VID].strip() if params[cls.SESSIONID]: params[cls.SESSIONID] = params[cls.SESSIONID].strip() return params except KeyError as e1: logging.exception('key error') raise errors.OperationFailedError(e1) except errors.Error as e2: logging.debug('Problem with params: %s: %s', params, e2.error_message) raise errors.OperationFailedError(e2.error_message) @classmethod def buildVideoBatch(cls, rows): """Build video documents and their related datastore entities, in batch, given a list of params dicts. Should be used for new videos, as does not handle updates of existing video entities. This method does not require that the doc ids be tied to the video ids, and obtains the doc ids from the results of the document add.""" docs = [] dbps = [] for row in rows: try: params = cls._normalizeParams(row) doc = cls._createDocument(**params) docs.append(doc) # create video entity, sans doc_id dbp = models.Video(id=params[cls.UNIQUEID], duration_min=int(params[cls.DURATION_MIN]), category=params[cls.CATEGORY]) dbps.append(dbp) except errors.OperationFailedError: logging.error('error creating document from data: %s.', row) logging.debug('buildVideoBatch: docs=%s.', len(docs)) logging.debug('buildVideoBatch: dbps=%s.', len(dbps)) try: add_results = cls.add(docs) except search.Error: logging.exception('Add failed') return if len(add_results) != len(dbps): # this case should not be reached; if there was an issue, # search.Error should have been thrown, above. raise errors.OperationFailedError( 'Error: wrong number of results returned from indexing operation' ) # now set the entities with the doc ids, the list of which are returned in # the same order as the list of docs given to the indexers for i, dbp in enumerate(dbps): dbp.doc_id = add_results[i].id # persist the entities ndb.put_multi(dbps) @classmethod def buildVideo(cls, params): """Create/update a video document and its related datastore entity. The video id and the field values are taken from the params dict. """ params = cls._normalizeParams(params) # check to see if doc already exists. We do this because we need to retain # some information from the existing doc. We could skip the fetch if this # were not the case. curr_doc = cls.getDocFromUid(params[cls.UNIQUEID]) d = cls._createDocument(**params) if curr_doc: # retain ratings info from existing doc avg_rating = cls(curr_doc).getAvgRating() cls(d).setAvgRating(avg_rating) # This will reindex if a doc with that doc id already exists doc_ids = cls.add(d) try: doc_id = doc_ids[0].id except IndexError: doc_id = None raise errors.OperationFailedError('could not index document') logging.debug('got new doc id %s for video: %s', doc_id, params[cls.UNIQUEID]) # now update the entity def _tx(): # Check whether the video entity exists. If so, we want to update # from the params, but preserve its ratings-related info. v = models.Video.get_by_id(params[cls.UNIQUEID]) if v: #update v.update_core(params, doc_id) else: # create new entity v = models.Video.create(params, doc_id) v.put() return v v = ndb.transaction(_tx) logging.debug('video: %s', v) return v
def _render(self): year, location, range_limit, search_type, page = self._get_params() num_results = 0 results = [] distances = [] if location: lat_lon, _ = LocationHelper.get_lat_lon(location, geocode=True) if lat_lon: lat, lon = lat_lon dist_expr = 'distance(location, geopoint({}, {}))'.format( lat, lon) if search_type == 'teams': query_string = '{} < {}'.format( dist_expr, range_limit * self.METERS_PER_MILE) else: query_string = '{} < {} AND year={}'.format( dist_expr, range_limit * self.METERS_PER_MILE, year) offset = self.PAGE_SIZE * page query = search.Query( query_string=query_string, options=search.QueryOptions( limit=self.PAGE_SIZE, offset=offset, sort_options=search.SortOptions(expressions=[ search.SortExpression( expression=dist_expr, direction=search.SortExpression.ASCENDING) ]), returned_expressions=[ search.FieldExpression(name='distance', expression=dist_expr) ], )) if search_type == 'teams': search_index = search.Index(name="teamLocation") else: search_index = search.Index(name="eventLocation") docs = search_index.search(query) num_results = docs.number_found distances = {} keys = [] event_team_count_futures = {} for result in docs.results: distances[result.doc_id] = result.expressions[ 0].value / self.METERS_PER_MILE if search_type == 'teams': event_team_count_futures[ result.doc_id] = EventTeam.query( EventTeam.team == ndb.Key( 'Team', result.doc_id), EventTeam.year == year).count_async( limit=1, keys_only=True) keys.append(ndb.Key('Team', result.doc_id)) else: keys.append(ndb.Key('Event', result.doc_id)) result_futures = ndb.get_multi_async(keys) if search_type == 'teams': results = [] for result_future, team_key in zip(result_futures, keys): if event_team_count_futures[ team_key.id()].get_result() != 0: results.append(result_future.get_result()) else: results = [ result_future.get_result() for result_future in result_futures ] self.template_values.update({ 'valid_years': self.VALID_YEARS, 'valid_ranges': self.VALID_RANGES, 'page_size': self.PAGE_SIZE, 'page': page, 'year': year, 'location': location, 'range_limit': range_limit, 'search_type': search_type, 'num_results': num_results, 'results': results, 'distances': distances, }) return jinja2_engine.render('nearby.html', self.template_values)
def _internal_search(cls, query_string, explicit_query_string_overrides=None, cursor_support=False, existing_cursor=None, limit=20, number_found_accuracy=None, offset=None, sort_options=None, returned_fields=None, ids_only=False, snippeted_fields=None, returned_expressions=None, sort_limit=1000, *args, **kwargs): """ Query search records in the search index. Essentially the params are the same as for GAE Search API. The exceptions are cursor, returned_expressions and sort_options. 'explicit_query_string_overrides' is an iterable of tuples of the form ('property', 'value') which can be used to explicitly overwrite values from the supplied query string. This is useful if you have some custom filters that must only have certain values. It can also be used to prevent searches occurring with restricted values; useful as part of permission systems. Cursor is replaced by two args - cursor_support and existing_cursor. Existing cursor is the websafe version of a cursor returned by a previous query. Obviously if cursor_support is False then we don't process the cursor. Both returned_expressions and sort_options are lists of tuples instead of passing in search.FieldExpressions or search.SortOptions (as this would leak implementation to the client). returned_expression = ('name_of_expression', 'expression') sort_option = ('sort_expression, 'direction', 'default_value) See https://cloud.google.com/appengine/docs/python/search/options for more detailed explanations. Sort limit should be overridden if possible matches exceeds 1000. It should be set to a value higher, or equal to, the maximum number of results that could be found for a given search. :param query_string: :param explicit_query_string_overrides: :param cursor_support: :param existing_cursor: :param limit: :param number_found_accuracy: :param offset: :param sort_options: :param returned_fields: :param ids_only: :param snippeted_fields: :param returned_expressions: :param sort_limit: :param args: :param kwargs: :raises search.Error: :raises TypeError: :raises ValueError: """ cursor = None compiled_sort_options = None compiled_field_expressions = None if explicit_query_string_overrides: # TODO: use regex to split up the query string and swap out/append the explicit params. At the moment # multiple values could be passed for the same category, leading to possible data leaks query_fragments = [] for explicit_param in explicit_query_string_overrides: query_fragments.append(u'{}="{}"'.format(explicit_param[0], explicit_param[1].replace(',', '\,').replace('+', '\+').strip())) explicit_string = u' AND '.join(query_fragments) if explicit_string: query_string = u'{} {}'.format(query_string, explicit_string) if cursor_support: if existing_cursor: cursor = search.Cursor(web_safe_string=existing_cursor) else: cursor = search.Cursor() if sort_options: parsed_options = [search.SortExpression(expression=sort_option[0], direction=sort_option[1], default_value=sort_option[2]) for sort_option in sort_options] compiled_sort_options = search.SortOptions(expressions=parsed_options, limit=sort_limit) if returned_expressions: compiled_field_expressions = [search.FieldExpression(name=field_exp[0], expression=field_exp[1]) for field_exp in returned_expressions] options = search.QueryOptions( ids_only=ids_only, limit=limit, snippeted_fields=snippeted_fields, number_found_accuracy=number_found_accuracy, returned_fields=returned_fields, returned_expressions=compiled_field_expressions, sort_options=compiled_sort_options, offset=offset, cursor=cursor, ) query = search.Query(query_string=query_string, options=options) try: return cls.index.search_async(query=query) except (search.Error, TypeError, ValueError): logging.exception(u"Query {0} in {1} failed.".format(query_string, cls.index.name)) raise
class Product(BaseDocumentManager): """Provides helper methods to manage Product documents. All Product documents built using these methods will include a core set of fields (see the _buildCoreProductFields method). We use the given product id (the Product entity key) as the doc_id. This is not required for the entity/document design-- each explicitly point to each other, allowing their ids to be decoupled-- but using the product id as the doc id allows a document to be reindexed given its product info, without having to fetch the existing document.""" _INDEX_NAME = config.PRODUCT_INDEX_NAME # 'core' product document field names PID = 'pid' DESCRIPTION = 'description' CATEGORY = 'category' PRODUCT_NAME = 'name' PRICE = 'price' AVG_RATING = 'ar' #average rating UPDATED = 'modified' _SORT_OPTIONS = [ [ AVG_RATING, 'average rating', search.SortExpression(expression=AVG_RATING, direction=search.SortExpression.DESCENDING, default_value=0) ], [ PRICE, 'price', search.SortExpression( # other examples: # expression='max(price, 14.99)' # If you access _score in your sort expressions, # your SortOptions should include a scorer. # e.g. search.SortOptions(match_scorer=search.MatchScorer(),...) # Then, you can access the score to build expressions like: # expression='price * _score' expression=PRICE, direction=search.SortExpression.ASCENDING, default_value=9999) ], [ UPDATED, 'modified', search.SortExpression(expression=UPDATED, direction=search.SortExpression.DESCENDING, default_value=1) ], [ CATEGORY, 'category', search.SortExpression(expression=CATEGORY, direction=search.SortExpression.ASCENDING, default_value='') ], [ PRODUCT_NAME, 'product name', search.SortExpression(expression=PRODUCT_NAME, direction=search.SortExpression.ASCENDING, default_value='zzz') ] ] _SORT_MENU = None _SORT_DICT = None @classmethod def deleteAllInProductIndex(cls): cls.deleteAllInIndex() @classmethod def getSortMenu(cls): if not cls._SORT_MENU: cls._buildSortMenu() return cls._SORT_MENU @classmethod def getSortDict(cls): if not cls._SORT_DICT: cls._buildSortDict() return cls._SORT_DICT @classmethod def _buildSortMenu(cls): """Build the default set of sort options used for Product search. Of these options, all but 'relevance' reference core fields that all Products will have.""" res = [(elt[0], elt[1]) for elt in cls._SORT_OPTIONS] cls._SORT_MENU = [('relevance', 'relevance')] + res @classmethod def _buildSortDict(cls): """Build a dict that maps sort option keywords to their corresponding SortExpressions.""" cls._SORT_DICT = {} for elt in cls._SORT_OPTIONS: cls._SORT_DICT[elt[0]] = elt[2] @classmethod def getDocFromPid(cls, pid): """Given a pid, get its doc. We're using the pid as the doc id, so we can do this via a direct fetch.""" return cls.getDoc(pid) @classmethod def removeProductDocByPid(cls, pid): """Given a doc's pid, remove the doc matching it from the product index.""" cls.removeDocById(pid) @classmethod def updateRatingInDoc(cls, doc_id, avg_rating): # get the associated doc from the doc id in the product entity doc = cls.getDoc(doc_id) if doc: pdoc = cls(doc) pdoc.setAvgRating(avg_rating) # The use of the same id will cause the existing doc to be reindexed. return doc else: raise errors.OperationFailedError( 'Could not retrieve doc associated with id %s' % (doc_id, )) @classmethod def updateRatingsInfo(cls, doc_id, avg_rating): """Given a models.Product entity, update and reindex the associated document with the product entity's current average rating. """ ndoc = cls.updateRatingInDoc(doc_id, avg_rating) # reindex the returned updated doc return cls.add(ndoc) # 'accessor' convenience methods def getPID(self): """Get the value of the 'pid' field of a Product doc.""" return self.getFieldVal(self.PID) def getName(self): """Get the value of the 'name' field of a Product doc.""" return self.getFieldVal(self.PRODUCT_NAME) def getDescription(self): """Get the value of the 'description' field of a Product doc.""" return self.getFieldVal(self.DESCRIPTION) def getCategory(self): """Get the value of the 'cat' field of a Product doc.""" return self.getFieldVal(self.CATEGORY) def setCategory(self, cat): """Set the value of the 'cat' (category) field of a Product doc.""" return self.setFirstField( search.NumberField(name=self.CATEGORY, value=cat)) def getAvgRating(self): """Get the value of the 'ar' (average rating) field of a Product doc.""" return self.getFieldVal(self.AVG_RATING) def setAvgRating(self, ar): """Set the value of the 'ar' field of a Product doc.""" return self.setFirstField( search.NumberField(name=self.AVG_RATING, value=ar)) def getPrice(self): """Get the value of the 'price' field of a Product doc.""" return self.getFieldVal(self.PRICE) @classmethod def generateRatingsBuckets(cls, query_string): """Builds a dict of ratings 'buckets' and their counts, based on the value of the 'avg_rating" field for the documents retrieved by the given query. See the 'generateRatingsLinks' method. This information will be used to generate sidebar links that allow the user to drill down in query results based on rating. For demonstration purposes only; this will be expensive for large data sets. """ # do the query on the *full* search results # to generate the facet information, imitating what may in future be # provided by the FTS API. try: sq = search.Query(query_string=query_string.strip()) search_results = cls.getIndex().search(sq) except search.Error: logging.exception('An error occurred on search.') return None ratings_buckets = collections.defaultdict(int) # populate the buckets for res in search_results: ratings_buckets[int((cls(res)).getAvgRating() or 0)] += 1 return ratings_buckets @classmethod def generateRatingsLinks(cls, query, phash): """Given a dict of ratings 'buckets' and their counts, builds a list of html snippets, to be displayed in the sidebar when showing results of a query. Each is a link that runs the query, additionally filtered by the indicated ratings interval.""" ratings_buckets = cls.generateRatingsBuckets(query) if not ratings_buckets: return None rlist = [] for k in range(config.RATING_MIN, config.RATING_MAX + 1): try: v = ratings_buckets[k] except KeyError: return # build html if k < 5: htext = '%s-%s (%s)' % (k, k + 1, v) else: htext = '%s (%s)' % (k, v) phash['rating'] = k hlink = '/psearch?' + urllib.urlencode(phash) rlist.append((hlink, htext)) return rlist @classmethod def _buildCoreProductFields(cls, pid, name, description, category, category_name, price): """Construct a 'core' document field list for the fields common to all Products. The various categories (as defined in the file 'categories.py'), may add additional specialized fields; these will be appended to this core list. (see _buildProductFields).""" fields = [ search.TextField(name=cls.PID, value=pid), # The 'updated' field is always set to the current date. search.DateField(name=cls.UPDATED, value=datetime.datetime.now().date()), search.TextField(name=cls.PRODUCT_NAME, value=name), # strip the markup from the description value, which can # potentially come from user input. We do this so that # we don't need to sanitize the description in the # templates, showing off the Search API's ability to mark up query # terms in generated snippets. This is done only for # demonstration purposes; in an actual app, # it would be preferrable to use a library like Beautiful Soup # instead. # We'll let the templating library escape all other rendered # values for us, so this is the only field we do this for. search.TextField(name=cls.DESCRIPTION, value=re.sub(r'<[^>]*?>', '', description)), search.AtomField(name=cls.CATEGORY, value=category), search.NumberField(name=cls.AVG_RATING, value=0.0), search.NumberField(name=cls.PRICE, value=price) ] return fields @classmethod def _buildProductFields(cls, pid=None, category=None, name=None, description=None, category_name=None, price=None, **params): """Build all the additional non-core fields for a document of the given product type (category), using the given params dict, and the already-constructed list of 'core' fields. All such additional category-specific fields are treated as required. """ fields = cls._buildCoreProductFields(pid, name, description, category, category_name, price) # get the specification of additional (non-'core') fields for this category pdict = categories.product_dict.get(category_name) if pdict: # for all fields for k, field_type in pdict.iteritems(): # see if there is a value in the given params for that field. # if there is, get the field type, create the field, and append to the # document field list. if k in params: v = params[k] if field_type == search.NumberField: try: val = float(v) fields.append(search.NumberField(name=k, value=val)) except ValueError: error_message = ( 'bad value %s for field %s of type %s' % (k, v, field_type)) logging.error(error_message) raise errors.OperationFailedError(error_message) elif field_type == search.TextField: fields.append(search.TextField(name=k, value=str(v))) else: # you may want to add handling of other field types for generality. # Not needed for our current sample data. logging.warn('not processed: %s, %s, of type %s', k, v, field_type) else: error_message = ( 'value not given for field "%s" of field type "%s"' % (k, field_type)) logging.warn(error_message) raise errors.OperationFailedError(error_message) else: # else, did not have an entry in the params dict for the given field. logging.warn( 'product field information not found for category name %s', params['category_name']) return fields @classmethod def _createDocument(cls, pid=None, category=None, name=None, description=None, category_name=None, price=None, **params): """Create a Document object from given params.""" # check for the fields that are always required. if pid and category and name: # First, check that the given pid has only visible ascii characters, # and does not contain whitespace. The pid will be used as the doc_id, # which has these requirements. if not cls.isValidDocId(pid): raise errors.OperationFailedError("Illegal pid %s" % pid) # construct the document fields from the params resfields = cls._buildProductFields(pid=pid, category=category, name=name, description=description, category_name=category_name, price=price, **params) # build and index the document. Use the pid (product id) as the doc id. # (If we did not do this, and left the doc_id unspecified, an id would be # auto-generated.) d = search.Document(doc_id=pid, fields=resfields) return d else: raise errors.OperationFailedError('Missing parameter.') @classmethod def _normalizeParams(cls, params): """Normalize the submitted params for building a product.""" params = copy.deepcopy(params) try: params['pid'] = params['pid'].strip() params['name'] = params['name'].strip() params['category_name'] = params['category'] params['category'] = params['category'] try: params['price'] = float(params['price']) except ValueError: error_message = 'bad price value: %s' % params['price'] logging.error(error_message) raise errors.OperationFailedError(error_message) return params except KeyError as e1: logging.exception("key error") raise errors.OperationFailedError(e1) except errors.Error as e2: logging.debug('Problem with params: %s: %s' % (params, e2.error_message)) raise errors.OperationFailedError(e2.error_message) @classmethod def buildProductBatch(cls, rows): """Build product documents and their related datastore entities, in batch, given a list of params dicts. Should be used for new products, as does not handle updates of existing product entities. This method does not require that the doc ids be tied to the product ids, and obtains the doc ids from the results of the document add.""" docs = [] dbps = [] for row in rows: try: params = cls._normalizeParams(row) doc = cls._createDocument(**params) docs.append(doc) # create product entity, sans doc_id dbp = models.Product(id=params['pid'], price=params['price'], category=params['category']) dbps.append(dbp) except errors.OperationFailedError: logging.error('error creating document from data: %s', row) try: add_results = cls.add(docs) except search.Error: logging.exception('Add failed') return if len(add_results) != len(dbps): # this case should not be reached; if there was an issue, # search.Error should have been thrown, above. raise errors.OperationFailedError( 'Error: wrong number of results returned from indexing operation' ) # now set the entities with the doc ids, the list of which are returned in # the same order as the list of docs given to the indexers for i, dbp in enumerate(dbps): dbp.doc_id = add_results[i].id # persist the entities ndb.put_multi(dbps) @classmethod def buildProduct(cls, params): """Create/update a product document and its related datastore entity. The product id and the field values are taken from the params dict. """ params = cls._normalizeParams(params) # check to see if doc already exists. We do this because we need to retain # some information from the existing doc. We could skip the fetch if this # were not the case. curr_doc = cls.getDocFromPid(params['pid']) d = cls._createDocument(**params) if curr_doc: # retain ratings info from existing doc avg_rating = cls(curr_doc).getAvgRating() cls(d).setAvgRating(avg_rating) # This will reindex if a doc with that doc id already exists doc_ids = cls.add(d) try: doc_id = doc_ids[0].id except IndexError: doc_id = None raise errors.OperationFailedError('could not index document') logging.debug('got new doc id %s for product: %s', doc_id, params['pid']) # now update the entity def _tx(): # Check whether the product entity exists. If so, we want to update # from the params, but preserve its ratings-related info. prod = models.Product.get_by_id(params['pid']) if prod: #update prod.update_core(params, doc_id) else: # create new entity prod = models.Product.create(params, doc_id) prod.put() return prod prod = ndb.transaction(_tx) logging.debug('prod: %s', prod) return prod
def post(self): template_values = get_template_values(self) user = users.get_current_user() if self.request.get('next'): cursor = search.Cursor(web_safe_string=self.request.get('next')) else: cursor = search.Cursor() q = query = self.request.get("search-text").replace(',', "") order = self.request.get("search-order") completed = True if self.request.get( "search-completed") == "on" else False template_values["query_values"] = { 'query': query, 'order': order, 'completed': completed, } if order == "rating": sort_exp = search.SortExpression( expression='rating', direction=search.SortExpression.DESCENDING, default_value=0) elif order == "times_taken": sort_exp = search.SortExpression( expression='times_taken', direction=search.SortExpression.DESCENDING, default_value=0) elif order == "date_inc": sort_exp = search.SortExpression( expression='date', direction=search.SortExpression.DESCENDING, default_value=0) elif order == "date_dec": sort_exp = search.SortExpression( expression='date', direction=search.SortExpression.ASCENDING, default_value=0) elif order == "level_dec": sort_exp = search.SortExpression( expression='level', direction=search.SortExpression.DESCENDING, default_value=0) elif order == "level_inc": sort_exp = search.SortExpression( expression='level', direction=search.SortExpression.ASCENDING, default_value=0) query_options = search.QueryOptions( limit=self.page_depth, cursor=cursor, sort_options=search.SortOptions(expressions=[ sort_exp, ]), ) query_obj = search.Query(query_string=query, options=query_options) results = search.Index(name="tests").search(query=query_obj) template_values["query_results"] = [] for document in results: test = Test.query(Test.id == document.doc_id).get() if completed and user: # If the "Hide completed" checkbox is selected by the user if Mark.query(Mark.taker_entity.id == user.user_id(), Mark.test.id == test.id).get() != None: # And a Mark has been created continue # Don't add it to the list. # If this continue is active, this selects out TAKEN tests # Otherwise , this if statement selects out MARKED tests if Mark.query(Mark.complete == False).get() == None: # And the Test has been marked as completed for this user. continue # Don't add it to the list. template_values["query_results"].append(test) path = os.path.join(os.path.dirname(__file__), os.path.join(template_dir, 'main.html')) self.response.out.write(template.render(path, template_values)) return
class SearchTest(loanertest.EndpointsTestCase, parameterized.TestCase): @parameterized.parameters(( shelf_messages.Shelf(location='NY', capacity=50), 'location:NY capacity:50 enabled:True', ), ( shelf_messages.Shelf(location='NY', capacity=50, enabled=False), 'location:NY capacity:50 enabled:False', )) def test_to_query(self, message, expected_query): """Tests the creation of a valid search query from ndb properties.""" query = search_utils.to_query(message, shelf_model.Shelf) # The query is split because ndb properties are unordered when called by # model_class._properties. This test would be flaky otherwise. self.assertCountEqual(query.split(' '), expected_query.split(' ')) @mock.patch.object(search_utils, 'logging', autospec=True) def test_document_to_message(self, mock_logging): """Tests the creation of a protorpc message from a search document.""" test_search_document = search.ScoredDocument( doc_id='test_doc_id', fields=[ search.NumberField(name='capacity', value=20.0), search.TextField(name='location', value='US MTV'), search.AtomField(name='location', value='US-MTV'), search.AtomField(name='enabled', value='True'), search.GeoField(name='lat_long', value=search.GeoPoint(52.37, 4.88)), search.TextField(name='not_present', value='MTV') ]) expected_message = shelf_messages.Shelf(enabled=True, location='US-MTV', capacity=20, latitude=52.37, longitude=4.88) response_message = search_utils.document_to_message( test_search_document, shelf_messages.Shelf()) self.assertEqual(response_message, expected_message) self.assertTrue(response_message.enabled) assert mock_logging.error.call_count == 1 def test_get_search_cursor(self): """Tests the creation of a search cursor with a web_safe_string.""" expected_cursor_web_safe_string = 'False:ODUxODBhNTgyYTQ2ZmI0MDU' returned_cursor = ( search_utils.get_search_cursor(expected_cursor_web_safe_string)) self.assertEqual(expected_cursor_web_safe_string, returned_cursor.web_safe_string) @mock.patch.object(search, 'Cursor', autospec=True) def test_get_search_cursor_error(self, mock_cursor): """Tests the creation of a search cursor when an error occurs.""" mock_cursor.side_effect = ValueError with self.assertRaisesWithLiteralMatch(endpoints.BadRequestException, search_utils._CORRUPT_KEY_MSG): search_utils.get_search_cursor(None) @parameterized.named_parameters( { 'testcase_name': 'QueryStringOnly', 'request': shared_messages.SearchRequest(query_string='enrolled:True'), 'expected_values': ('enrolled:True', None, []) }, { 'testcase_name': 'QueryStringWithReturnedFields', 'request': shared_messages.SearchRequest(query_string='location:US-NYC', returned_fields=['location']), 'expected_values': ('location:US-NYC', None, ['location']) }, ) def test_set_search_query_options(self, request, expected_values): """Tests setting the query options without sort options from message.""" returned_query, returned_sort_options, returned_returned_fields = ( search_utils.set_search_query_options(request)) expected_query, expected_sort_options, expcted_returned_fields = ( expected_values) self.assertEqual(expected_sort_options, returned_sort_options) self.assertEqual(expected_query, returned_query) self.assertEqual(expcted_returned_fields, returned_returned_fields) @parameterized.named_parameters( { 'testcase_name': 'ExpressionWithDirection', 'request': shared_messages.SearchRequest( query_string='enrolled:True', expressions=[ shared_messages.SearchExpression( expression='enrolled', direction=shared_messages.SortDirection.ASCENDING) ]), 'expected_sort_options_expressions': [ search.SortExpression( expression='enrolled', direction=search.SortExpression.ASCENDING) ] }, { 'testcase_name': 'MultipleExpressionsWithDirection', 'request': shared_messages.SearchRequest( query_string='enrolled:True', expressions=[ shared_messages.SearchExpression( expression='enrolled', direction=shared_messages.SortDirection.ASCENDING), shared_messages.SearchExpression( expression='serial_number', direction=shared_messages.SortDirection.DESCENDING) ]), 'expected_sort_options_expressions': [ search.SortExpression( expression='enrolled', direction=search.SortExpression.ASCENDING), search.SortExpression( expression='serial_number', direction=search.SortExpression.DESCENDING) ] }, { 'testcase_name': 'ExpressionWithoutDirection', 'request': shared_messages.SearchRequest( query_string='enrolled:True', expressions=[ shared_messages.SearchExpression(expression='enrolled') ]), 'expected_sort_options_expressions': [search.SortExpression(expression='enrolled')] }, { 'testcase_name': 'MultipleExpressionsWithoutDirection', 'request': shared_messages.SearchRequest( query_string='enrolled:True', expressions=[ shared_messages.SearchExpression(expression='enrolled'), shared_messages.SearchExpression( expression='serial_number') ]), 'expected_sort_options_expressions': [ search.SortExpression( expression='enrolled', direction=search.SortExpression.DESCENDING), search.SortExpression( expression='serial_number', direction=search.SortExpression.DESCENDING) ] }, ) def test_set_search_query_options_with_sort_options( self, request, expected_sort_options_expressions): """Tests setting query options with sort options from message.""" returned_query, returned_sort_options, returned_returned_fields = ( search_utils.set_search_query_options(request)) del returned_query # Unused. del returned_returned_fields # Unused. for i in range(len(returned_sort_options.expressions)): self.assertEqual(returned_sort_options.expressions[i].expression, expected_sort_options_expressions[i].expression) self.assertEqual(returned_sort_options.expressions[i].direction, expected_sort_options_expressions[i].direction)
def get(self): # QUERY STRING q = self.request.get('q', default_value='') mobile_coupon = self.request.get('mobile_coupon', default_value='') latlong = self.request.get('latlong', default_value='') results = [] number_found = 0 try: index = search.Index(name='sample') # 位置情報で並び替え expressions = [] if latlong: expressions.append( search.SortExpression( expression='distance(location, geopoint(%s))' % latlong, direction=search.SortExpression.ASCENDING, default_value=None)) # ソートキーの設定 sort_opts = search.SortOptions(match_scorer=search.MatchScorer(), expressions=expressions) # 結果フィールドの設定 options = search.QueryOptions(limit=100, returned_fields=[ 'name', 'content', 'image', 'address', 'tel', 'location' ], snippeted_fields=['content'], sort_options=sort_opts, number_found_accuracy=10000, cursor=None) # 検索クエリの編集 query_string = u'' if q: query_string = u'(content:(%s) OR name:(%s))' % (q, q) if mobile_coupon: query_string += u' mobile_coupon:(%s)' % (mobile_coupon) # 検索実行 query = search.Query(query_string=query_string, options=options) documents = index.search(query) # 検索結果 number_found = documents.number_found for document in documents: # スニペット編集 expressions = [] for expression in document.expressions: expressions.append(expression.value) results.append({ 'doc_id': document.doc_id, 'name': document.field('name').value, 'content': document.field('content').value, 'image': document.field('image').value, 'snippet': ''.join(expressions), 'address': document.field('address').value, 'tel': document.field('tel').value }) # logging.info("#-- SearchHandler : results:%s" % (results)) except Exception as e: logging.exception("#-- SearchHandler Error: id:%s exception:%s" % (id, e)) template = JINJA_ENVIRONMENT.get_template('index.html') self.response.write( template.render({ 'q': q, 'mobile_coupon': mobile_coupon, 'latlong': latlong, 'results': results, 'number_found': number_found }))
from google.appengine.api import search, memcache from google.appengine.ext import db import logging from flask import request, jsonify from datetime import datetime, timedelta, date # sort results by author descending expr_list = [ search.SortExpression(expression='title', default_value='', direction=search.SortExpression.DESCENDING) ] # construct the sort options sort_opts = search.SortOptions(expressions=expr_list) query_options = search.QueryOptions(limit=10, sort_options=sort_opts) _INDEX_NAME = "posts" def delete_document(document_id): """deletes document from search index""" doc_index = search.Index(name=_INDEX_NAME) doc_index.delete(str(document_id)) def add_document_in_search_index(doc_id, title, body, summary, category, timestamp, tags): document = create_document(doc_id, title, body, summary, category, timestamp, tags)
def get_grouped_documents(self, application_id, keyword, index, document_model): """ Get document groups by application id, search keyword and index :param application_id: application id :param keyword: search keywords :param index: pager index :param document_model: document type :returns: [document_group], total / [], 0 """ if index is None: index = 0 try: application_id = long(application_id) index = int(index) except: return [], 0 # check auth aps = ApplicationService() if not aps.is_my_application(application_id): return [], 0 result = [] query_string = '' if keyword and len(keyword.strip()) > 0: source = [item for item in keyword.split(' ') if len(item) > 0] plus = [item for item in source if item.find('-') != 0] minus = [ item[1:] for item in source if item.find('-') == 0 and len(item) > 1 ] if len(plus) > 0: keyword = ' '.join(plus) query_string = '(name:{1}) OR (email:{1}) OR (description:{1}) OR (ip:{1}) OR (title:{1}) OR (status:{1})'.replace( '{1}', keyword) if len(minus) > 0: keyword = ' '.join(minus) query_string = 'NOT ((name:{1}) OR (email:{1}) OR (description:{1}) OR (ip:{1}) OR (title:{1}) OR (status:{1}))'.replace( '{1}', keyword) cache_key = MemcacheKey.document_search(application_id, document_model) cache_value = memcache.get(key=cache_key) if cache_value and keyword + str(index) in cache_value: # return from cache return cache_value[keyword + str(index)]['result'], cache_value[ keyword + str(index)]['count'] create_time_desc = search.SortExpression( expression='create_time', direction=search.SortExpression.DESCENDING, default_value=0) options = search.QueryOptions( offset=config.page_size * index, limit=config.page_size, sort_options=search.SortOptions(expressions=[create_time_desc], limit=1000), returned_fields=[ 'title', 'name', 'times', 'description', 'email', 'create_time' ]) query = search.Query(query_string=query_string, options=options) try: if document_model == DocumentModel.exception: # search data from ExceptionModel documents = search.Index( namespace='ExceptionModel', name=str(application_id)).search(query) elif document_model == DocumentModel.log: # search data from LogModel documents = search.Index( namespace='LogModel', name=str(application_id)).search(query) else: # search data from CrashModel documents = search.Index( namespace='CrashModel', name=str(application_id)).search(query) except: # schema missing return [], 0 for document in documents: result.append({ 'group_tag': document.doc_id, 'title': document.field('title').value, 'name': document.field('name').value, 'times': int(document.field('times').value), 'description': document.field('description').value, 'email': document.field('email').value, 'create_time': document.field('create_time').value.strftime( '%Y-%m-%dT%H:%M:%S.%fZ') }) # if number of documents over maximum then return the maximum if documents.number_found > 1000 + config.page_size: count = 1000 + config.page_size else: count = documents.number_found # set memory cache for 12 hours if cache_value is None: cache_value = { keyword + str(index): { 'result': result, 'count': count } } memcache.set(key=cache_key, value=cache_value, time=43200) else: cache_value[keyword + str(index)] = { 'result': result, 'count': count } memcache.set(key=cache_key, value=cache_value, time=43200) return result, count
def get(self): # データブックの名前を取得 databook_name = get_databook_name(self.request.get('db')) # データブックの表示タイトルを取得 databook_title = get_databook_title(self.request.get('db')) # 全文検索用インデックスの名前を取得 databook_indexname = get_databook_indexname(self.request.get('db')) # 表示メッセージの初期化 message_data = '' # 管理者ログインのチェック admin_login = False if users.is_current_user_admin(): admin_login = True # 管理者ログイン中の表示 admin_message = '' if users.get_current_user(): if admin_login: admin_message = '(管理者としてログインしています)' else: admin_message = '(管理者ではありません)' # ログイン/ログアウトURL設定 if users.get_current_user(): login_url = users.create_logout_url(self.request.uri) login_text = '[ログアウト]' else: login_url = users.create_login_url(self.request.uri) # login_text = '[ログイン]' login_text = '[管理]' # 書き込み禁止の判定 write_disabled_message = '' if not capabilities.CapabilitySet('datastore_v3', ['write']).is_enabled(): write_disabled_message = '【現在書き込みは禁止しています】' # 全文検索の単語を取得 search_flag = False search_count = 0 search_word = self.request.get('word').strip() # 全文検索の単語の先頭が「=」のときは特別扱い show_all_flag = False show_offset = 0 if search_word.startswith('='): i = 1 while i < len(search_word): ch = search_word[i] # 「*」のときは表示フラグを無視して全て表示する if ch == '*': i += 1 show_all_flag = True continue # 数字のときは表示件数のオフセットとする if ch.isdigit(): i += 1 j = i - 1 while i < len(search_word): ch = search_word[i] if ch.isdigit(): i += 1 continue break k = i if (k - j) > 5: k = j + 5 show_offset = int(search_word[j:k]) continue # その他のときは抜ける break search_word2 = search_word[i:] else: search_word2 = search_word # 全文検索の単語をチェック if search_word2: # 全文検索を行うとき articles = [] # 検索結果を日付の降順でソートする指定 expr_list = [ search.SortExpression( expression='date', default_value=datetime.datetime.min, direction=search.SortExpression.DESCENDING) ] # ソートオプションに設定する sort_opts = search.SortOptions(expressions=expr_list) # クエリーオプションに設定する # (表示件数指定、ソートオプション指定、検索結果はタイトルのみ取得) query_opts = search.QueryOptions(limit=mainpage_show_num, offset=show_offset, sort_options=sort_opts, returned_fields=['title']) try: # 単語とクエリーオプションを指定して全文検索実行 query_obj = search.Query(query_string=search_word2, options=query_opts) search_results = search.Index(name=databook_indexname).search( query=query_obj) # 検索結果から記事のタイトルを取得する req_titles = [] for scored_doc in search_results: req_titles.append(scored_doc.field('title').value) if len(req_titles) >= 1: # 記事を検索(タイトルで表示件数まで) if show_all_flag: articles_query = Article.query( Article.title.IN(req_titles), ancestor=databook_key(databook_name)).order( -Article.date) else: articles_query = Article.query( Article.title.IN(req_titles), Article.show_flag == 1, ancestor=databook_key(databook_name)).order( -Article.date) articles = articles_query.fetch(mainpage_show_num) except (search.QueryError, search.InvalidRequest), e: # クエリーエラーのとき message_data = message_data + '(クエリーエラー:検索文字列に記号が含まれると発生することがあります)' search_flag = True search_count = len(articles)
def query(querystr, cursorstr, limit): expr = search.SortExpression(expression="_score * 1.0", direction=search.SortExpression.DESCENDING, default_value=0.0) # Sort up to 1000 matching results by subject in descending order sort = search.SortOptions(expressions=[expr], limit=1000) cursor = search.Cursor(web_safe_string=cursorstr) options = search.QueryOptions( limit=limit, # the number of results to return cursor=cursor, sort_options=sort, returned_fields=["author", "tags", "title", "published"], snippeted_fields=["title", "content"], ) query = search.Query(query_string=_escape(querystr), options=options) index = search.Index(name="article_index") results = index.search(query) searchlist = [] for doc in results: postid = int(doc.doc_id) tags = doc["tags"][0].value.split(' ') date = doc["published"][0].value author = doc["author"][0].value title = '' content = '' for expr in doc.expressions: if expr.name == "content": content = expr.value elif expr.name == "title": title = expr.value searchlist.append({ 'postid': postid, "tags": tags, 'content': _useem(content), 'title': _useem(title), 'author': author, 'date': (date + timedelta(hours=8)).strftime('%Y-%m-%d %H:%M'), }) next_cursor = results.cursor if next_cursor: next_cursor_urlsafe = next_cursor.web_safe_string else: next_cursor_urlsafe = '' total = results.number_found return { 'query': querystr, 'size': len(searchlist), 'total': total, 'cursor': next_cursor_urlsafe, 'list': searchlist, }
def search(rq): ### # Temporary parameter rename redirect. # from .util import asciidict, unicodedict q = unicodedict(rq.GET) _redir = False for _from, _to in (('c', PARAM.CATEGORY), ('p', PARAM.PAGE), ('q', PARAM.SEARCH), ('s', PARAM.SORT)): val = q.pop(_from, None) if val: q[_to] = val _redir = True if _redir: logging.info("Redirecting %r -> %r" % (rq.GET, q)) return redir(rq.path + "?%s" % urllib.urlencode(asciidict(q))) # ### def page_q(page): return qset(PARAM.PAGE, page if page >= 2 else None) page = rq.GET.pop(PARAM.PAGE, None) if page: try: page = int(page) except ValueError: return not_found("Invalid page '%s'" % (page,)) if page < 2: return redir(page_q(page)) else: page = 1 page_size = 72 # divisible by 2, 3, and 4 page_limit = g_search.MAXIMUM_SEARCH_OFFSET / page_size + 1 if page > page_limit: return redir(page_q(page_limit)) sort = rq.GET.get(PARAM.SORT) if sort == SORT.CHEAP: sort = g_search.SortExpression( 'us_cents', g_search.SortExpression.ASCENDING) elif sort == SORT.DISCOUNT_AMT: sort = g_search.SortExpression( 'discount_us_cents', g_search.SortExpression.DESCENDING) elif sort == SORT.DISCOUNT_PC: sort = g_search.SortExpression( 'discount_pc', g_search.SortExpression.DESCENDING) elif sort == SORT.EXPENSIVE: sort = g_search.SortExpression( 'us_cents', g_search.SortExpression.DESCENDING) elif sort is not None: return redir(qset(PARAM.SORT)) # Default sort is rank descending, and the rank is the added timestamp. # (note: rank would be referenced as "_rank") # sort = g_search.SortExpression('added', g_search.SortExpression.DESCENDING) if sort: sort = g_search.SortOptions( [sort], limit=g_search.MAXIMUM_SORTED_DOCUMENTS) index = g_search.Index(ITEMS_INDEX) opts = g_search.QueryOptions( limit=page_size, number_found_accuracy=g_search.MAXIMUM_SORTED_DOCUMENTS if sort else g_search.MAXIMUM_SEARCH_OFFSET, offset=page_size * (page - 1), sort_options=sort) expr, filters = [], [] search_q = rq.GET.get(PARAM.SEARCH) if search_q: search_q = re.sub(r"[^a-z0-9&_~#]", " ", search_q.lower().strip()) \ .strip() if search_q: expr.append(search_q) filters.append(('"%s"' % search_q, qset(PARAM.SEARCH))) cats = rq.GET.get(PARAM.CATEGORY) if cats: cats = cats.split(",") try: cats = map(int, cats) except ValueError: return not_found("Invalid categories %s" % (cats,)) cats = nub(cats) cat_infos = map(get_categories().get, cats) if not all(cat_infos): return not_found("Invalid categories %s" % (cats,)) cats = zip(cats, cat_infos) cat_ids = ['"%d"' % c[0] for c in cats] expr.append("categories:(%s)" % " OR ".join(cat_ids)) cat_names = [c[1][1] for c in cats] filters.append((" OR ".join(cat_names), qset(PARAM.CATEGORY))) with log_latency("Search latency {:,d}ms"): rs = index.search(g_search.Query(" ".join(expr), opts), deadline=10) # limit to 1000 num_found = min(rs.number_found, g_search.MAXIMUM_SEARCH_OFFSET) max_page = num_found / page_size if rs.number_found % page_size: max_page += 1 max_page = max(min(max_page, page_limit), 1) if page > max_page: return redir(page_q(max_page)) def paging(): start_page = min(max(page - 5, 1), max(max_page - 10, 1)) end_page = min(start_page + 10, max_page) pages = [(p, page_q(p), p == page) for p in range(start_page, end_page + 1)] if not pages: # zero results, not even a single page return if len(pages) > 4: if pages[0][0] > 1: pages[0] = (1, page_q(1), False) if pages[1][0] > 2: pages[1] = (u"…",) + pages[1][1:] if pages[-1][0] < max_page: pages[-1] = (max_page, page_q(max_page), False) if pages[-2][0] < (max_page - 1): pages[-2] = (u"…",) + pages[-2][1:] paging = {'range': pages} p_prev = filter(lambda p: p[0] == page - 1, pages) if p_prev: paging['prev'] = p_prev[0] p_next = filter(lambda p: p[0] == page + 1, pages) if p_next: paging['next'] = p_next[0] return paging with log_latency("get_categories() latency {:,d}ms"): cats = get_categories() with log_latency("ItemView latency {:,d}ms"): items = ItemView.make_views(rs.results, cats) ctx = { 'items': items, 'paging': paging(), 'filters': filters, 'warnings': [], 'PARAM': PARAM, 'SORT': SORT, } if rs.number_found < g_search.MAXIMUM_SEARCH_OFFSET: ctx['total_count'] = "{:,d}".format(rs.number_found) else: ctx['total_count'] = "{:,d}+".format(g_search.MAXIMUM_SEARCH_OFFSET) if rs.number_found >= g_search.MAXIMUM_SORTED_DOCUMENTS: ctx['warnings'].append( "Sorting may be missing items due to large number of hits") with log_latency("Render latency {:,d}ms"): return render("search.html", ctx)
def search_content(self, params): index = search.Index(config.content_index) search_text = '' # This search_results objects has properties `number_found`, `results`, # and `cursor`. See # https://cloud.google.com/appengine/docs/python/search/searchresultsclass # search.Query docs: # https://cloud.google.com/appengine/docs/python/search/queryclass # search.QueryOptions docs: # https://cloud.google.com/appengine/docs/python/search/queryoptionsclass # Pagination using a 'page' argument pulls n arguments offset by n*page # pull first set of results with page=0 page_size = 20 offset = 0 if 'page' in params: offset = int(params.pop('page')) * page_size # Build the SortOptions with 2 sort keys sort1 = search.SortExpression( expression='display_order', direction=search.SortExpression.DESCENDING, default_value=0) sort2 = search.SortExpression( expression='votes_for', direction=search.SortExpression.DESCENDING, default_value=0) sort3 = search.SortExpression( expression='created', direction=search.SortExpression.DESCENDING, default_value=0) sort_opts = search.SortOptions(expressions=[sort1, sort2, sort3]) search_results = index.search( search.Query(query_string=self._stringify_search_params(params), options=search.QueryOptions( limit=page_size, offset=offset, snippeted_fields=['summary', 'body'], sort_options=sort_opts, ))) for result in search_results.results: if 'Chapter_' in result.doc_id: book_id = None for field in result.fields: if field.name == 'books': book_id = field.value if book_id: book = Book.get_by_id(book_id) result.fields.append(search.TextField('bookUID', book.uid)) result.fields.append( search.TextField('bookTitle', book.title)) result.fields.append( search.TextField( 'bookIcon', util.extract_value_from_json(book.icon, 'link'))) result.fields.append( search.TextField( 'chapterNumber', str(book.chapters.index(result.doc_id) + 1))) result_dicts = [ util.search_document_to_dict(doc) for doc in search_results.results ] result_dicts = self._annotate_search_content(result_dicts) return result_dicts
def get(self): self.response.headers['Content-Type'] = 'text/plain' search_query = str( urllib.unquote(cgi.escape(self.request.get('q')).lower()[:100])) name = str( urllib.unquote(cgi.escape(self.request.get('name')).lower()[:50])) token = str(urllib.unquote(cgi.escape(self.request.get('token')))) page_num = parse_int( urllib.unquote(cgi.escape(self.request.get('page_num'))), 1) page_size = parse_int( urllib.unquote(cgi.escape(self.request.get('page_size'))), 20) if search_query + name == "": d = {} d['data'] = [] d['token'] = token d['q'] = "" s = json.dumps(d) self.response.out.write(s) return if search_query == "": search_query = name if page_size > _PAGE_SIZE or page_size < 1: page_size = _PAGE_SIZE # Flood Prevention ip = str(self.request.remote_addr) ipCount = memcache.get(ip) if ipCount is not None: if ipCount > 1000: d = {} d['data'] = 'Quota Exceeded' d['token'] = token d['q'] = search_query s = json.dumps(d) self.response.out.write(s) ban_time = 600 + 60 * 2**((ipCount - 1000)) if ban_time > 7 * 24 * 60 * 60: ban_time = 7 * 24 * 60 * 60 logging.info('Quota exceeded for ' + ip + ', count at ' + str(ipCount) + ', banned for ' + str(ban_time)) memcache.replace(ip, ipCount + 1, time=ban_time) if (ipCount - 1001) % 100 == 0: message = mail.EmailMessage( sender= "IP Banning <*****@*****.**>", subject="RPIDirectory IP " + ip + " Banned") message.to = "*****@*****.**" message.body = "IP: " + ip + "\nban time: " + str( ban_time ) + "\nQuery: " + search_query + "\nHit Count: " + str( ipCount) message.send() logging.info("EMail sent about ip: " + ip) return memcache.replace(ip, ipCount + 1, time=600) else: memcache.add(ip, 1, time=600) queries = map(str, search_query.split()) queries = sorted(queries) query_string = ' AND '.join(queries) d = {} d["data"] = [] d["token"] = token d["q"] = search_query data = memcache.get(query_string) if not data: data = [] #Sort results by first name descending expr_list = [ search.SortExpression( expression='first_name', default_value='', direction=search.SortExpression.DESCENDING) ] # construct the sort options sort_opts = search.SortOptions(expressions=expr_list) offset_num = (page_num - 1) * page_size query_options = search.QueryOptions(limit=page_size, offset=offset_num, ids_only=True, sort_options=sort_opts) results = search.Index(name=_INDEX_NAME).search(query=search.Query( query_string=query_string, options=query_options)) for result in results: rcsid = result.doc_id r = Person.get_by_id(rcsid) if r: data.append(Person.buildMap(r)) memcache.add(query_string, data, time=2419200) d["data"] = data s = json.dumps(d) self.response.out.write(s)
def _render(self): new_search = not self._year or (not self._award_types and not self._seed and not self._playoff_level and not self._cad_model) if new_search: result_models = [] num_results = 0 result_expressions = None else: # Construct query string sort_options_expressions = [] returned_expressions = [] partial_queries = [] search_index = search.Index(name=SearchHelper.TEAM_AWARDS_INDEX) partial_queries.append('year={}'.format(self._year)) award_filter = ' OR '.join(['award={}'.format(award_type) for award_type in self._award_types]) if award_filter: partial_queries.append(award_filter) if self._seed: seed_field_name = 'seed_{}'.format(self._seed) partial_queries.append('{}>0'.format(seed_field_name)) returned_expressions.append(search.FieldExpression( name='seed_count', expression=seed_field_name)) if self._sort_field == 'seed': sort_options_expressions.append( search.SortExpression( expression=seed_field_name, direction=search.SortExpression.DESCENDING)) if self._playoff_level: comp_level_name = 'comp_level_{}'.format(self.PLAYOFF_MAP[self._playoff_level]) partial_queries.append('{}>0'.format(comp_level_name)) returned_expressions.append(search.FieldExpression( name='comp_level_count', expression=comp_level_name)) if self._sort_field == 'playoff_level': sort_options_expressions.append( search.SortExpression( expression=comp_level_name, direction=search.SortExpression.DESCENDING)) if self._cad_model: partial_queries.append('has_cad=1') query_string = ' AND ' .join(partial_queries) # Tiebreak sorting by number sort_options_expressions.append( search.SortExpression( expression='number', direction=search.SortExpression.ASCENDING)) # Perform query query = search.Query( query_string=query_string, options=search.QueryOptions( limit=self.PAGE_SIZE, number_found_accuracy=10000, # Larger than the number of possible results offset=self.PAGE_SIZE * self._page, sort_options=search.SortOptions( expressions=sort_options_expressions ), returned_expressions=returned_expressions ) ) docs = search_index.search(query) num_results = docs.number_found model_keys = [] result_expressions = defaultdict(lambda: defaultdict(float)) for result in docs.results: team_key = result.doc_id.split('_')[0] model_keys.append(ndb.Key('Team', team_key)) for expression in result.expressions: result_expressions[team_key][expression.name] = expression.value model_futures = ndb.get_multi_async(model_keys) result_models = [model_future.get_result() for model_future in model_futures] self.template_values.update({ 'valid_years': self.VALID_YEARS, 'valid_award_types': self.VALID_AWARD_TYPES, 'num_special_awards': len(SORT_ORDER), 'valid_seeds': self.VALID_SEEDS, 'seed': self._seed, 'playoff_level': self._playoff_level, 'page_size': self.PAGE_SIZE, 'max_results': self.MAX_RESULTS, 'page': self._page, 'year': self._year, 'award_types': self._award_types, 'cad_model': self._cad_model, 'new_search': new_search, 'num_results': num_results, 'capped_num_results': min(self.MAX_RESULTS, num_results), 'result_models': result_models, 'result_expressions': result_expressions, 'sort_field': self._sort_field, }) return jinja2_engine.render('advanced_search.html', self.template_values)
def simple_search(querystring=None, start=None, end=None, category=None, venue_slug=None, limit=100, sort=None): """ TODO: "term", "near", "by type", "now" and any combo """ logging.debug([start, end]) if not querystring: querystring = '' # Now = started and hasn't ended yet if start: if querystring: querystring += ' AND ' querystring += 'start <= %s' % unix_time(start) if end: if querystring: querystring += ' AND ' querystring += 'end >= %s' % unix_time(end) if venue_slug: if querystring: querystring += ' AND ' querystring += ' venue_slug: %s' % venue_slug if category: if querystring: querystring += ' AND ' if isinstance(category, list): querystring += ' (' x = 0 for c in category: if x > 0: querystring += ' OR ' querystring += ' category: %s' % c x += 1 querystring += ' ) ' else: querystring += 'category: %s' % category #DISTANCE_LIMIT = int(3 * 111) # 3 KM - 3 * 10,000km per 90 degrees #querystring += ' AND distance(venue_geo, geopoint(%s,%s)) < %s' % (44.958815,-93.238138, DISTANCE_LIMIT) sort_expressions = [] if sort: direction = search.SortExpression.ASCENDING if sort[0] == '-': direction = search.SortExpression.DESCENDING sort = sort[1:] sort_expressions.append(search.SortExpression(expression=sort, direction=direction, default_value=0)) sort_options = search.SortOptions(expressions=sort_expressions) q_options = search.QueryOptions(limit=limit, sort_options=sort_options) logging.debug('Performing a search with querystring: %s' % querystring) search_query = search.Query(query_string=querystring, options=q_options) index = get_search_index() search_results = index.search(search_query) # Show # of results returned_count = len(search_results.results) number_found = search_results.number_found return {'number_found': number_found, 'returned_count': returned_count, 'index_results': search_results}
class SearchTest(loanertest.EndpointsTestCase, parameterized.TestCase): _ASSIGNED_DATE = datetime.datetime(year=2017, month=1, day=1) @parameterized.parameters(( shelf_messages.Shelf(location='NY', capacity=50), 'location:NY capacity:50 enabled:True', ), ( shelf_messages.Shelf(location='NY', capacity=50, enabled=False), 'location:NY capacity:50 enabled:False', )) def test_to_query(self, message, expected_query): """Tests the creation of a valid search query from ndb properties.""" query = search_utils.to_query(message, shelf_model.Shelf) # The query is split because ndb properties are unordered when called by # model_class._properties. This test would be flaky otherwise. self.assertCountEqual(query.split(' '), expected_query.split(' ')) @parameterized.named_parameters( ('Shelf Message', shelf_messages.Shelf(), search.ScoredDocument( doc_id='test_doc_id', fields=[ search.NumberField(name='capacity', value=20.0), search.TextField(name='location', value='US MTV'), search.AtomField(name='location', value='US-MTV'), search.AtomField(name='enabled', value='True'), search.GeoField(name='lat_long', value=search.GeoPoint(52.37, 4.88)), search.TextField(name='not_present', value='MTV') ]), shelf_messages.Shelf(enabled=True, location='US-MTV', capacity=20, latitude=52.37, longitude=4.88), 1), ('Device Message', device_messages.Device(), search.ScoredDocument( doc_id='test_doc_id', fields=[ search.DateField(name='assignment_date', value=_ASSIGNED_DATE), search.TextField(name='serial_number', value='1234'), search.AtomField(name='enrolled', value='True'), search.TextField(name='assigned_user', value='user') ]), device_messages.Device( enrolled=True, serial_number='1234', assigned_user='******', max_extend_date=_ASSIGNED_DATE + datetime.timedelta(days=14), assignment_date=_ASSIGNED_DATE), 0)) def test_document_to_message(self, message, test_search_document, expected_message, log_call_count): """Tests the creation of a protorpc message from a search document.""" with mock.patch.object(search_utils, 'logging', autospec=True) as mock_logging: response_message = search_utils.document_to_message( test_search_document, message) self.assertEqual(response_message, expected_message) self.assertEqual(mock_logging.error.call_count, log_call_count) def test_calculate_page_offset(self): """Tests the calculation of page offset.""" page_size = 10 page_number = 5 offset = search_utils.calculate_page_offset(page_size, page_number) self.assertEqual(40, offset) def test_calculate_total_pages(self): """Tests the calculation of total pages.""" page_size = 6 total_results = 11 total_pages = search_utils.calculate_total_pages( page_size, total_results) self.assertEqual(2, total_pages) @parameterized.named_parameters( { 'testcase_name': 'QueryStringOnly', 'request': shared_messages.SearchRequest(query_string='enrolled:True'), 'expected_values': ('enrolled:True', None, []) }, { 'testcase_name': 'QueryStringWithReturnedFields', 'request': shared_messages.SearchRequest(query_string='location:US-NYC', returned_fields=['location']), 'expected_values': ('location:US-NYC', None, ['location']) }, ) def test_set_search_query_options(self, request, expected_values): """Tests setting the query options without sort options from message.""" returned_query, returned_sort_options, returned_returned_fields = ( search_utils.set_search_query_options(request)) expected_query, expected_sort_options, expcted_returned_fields = ( expected_values) self.assertEqual(expected_sort_options, returned_sort_options) self.assertEqual(expected_query, returned_query) self.assertEqual(expcted_returned_fields, returned_returned_fields) @parameterized.named_parameters( { 'testcase_name': 'ExpressionWithDirection', 'request': shared_messages.SearchRequest( query_string='enrolled:True', expressions=[ shared_messages.SearchExpression( expression='enrolled', direction=shared_messages.SortDirection.ASCENDING) ]), 'expected_sort_options_expressions': [ search.SortExpression( expression='enrolled', direction=search.SortExpression.ASCENDING) ] }, { 'testcase_name': 'MultipleExpressionsWithDirection', 'request': shared_messages.SearchRequest( query_string='enrolled:True', expressions=[ shared_messages.SearchExpression( expression='enrolled', direction=shared_messages.SortDirection.ASCENDING), shared_messages.SearchExpression( expression='serial_number', direction=shared_messages.SortDirection.DESCENDING) ]), 'expected_sort_options_expressions': [ search.SortExpression( expression='enrolled', direction=search.SortExpression.ASCENDING), search.SortExpression( expression='serial_number', direction=search.SortExpression.DESCENDING) ] }, { 'testcase_name': 'ExpressionWithoutDirection', 'request': shared_messages.SearchRequest( query_string='enrolled:True', expressions=[ shared_messages.SearchExpression(expression='enrolled') ]), 'expected_sort_options_expressions': [search.SortExpression(expression='enrolled')] }, { 'testcase_name': 'MultipleExpressionsWithoutDirection', 'request': shared_messages.SearchRequest( query_string='enrolled:True', expressions=[ shared_messages.SearchExpression(expression='enrolled'), shared_messages.SearchExpression( expression='serial_number') ]), 'expected_sort_options_expressions': [ search.SortExpression( expression='enrolled', direction=search.SortExpression.DESCENDING), search.SortExpression( expression='serial_number', direction=search.SortExpression.DESCENDING) ] }, ) def test_set_search_query_options_with_sort_options( self, request, expected_sort_options_expressions): """Tests setting query options with sort options from message.""" returned_query, returned_sort_options, returned_returned_fields = ( search_utils.set_search_query_options(request)) del returned_query # Unused. del returned_returned_fields # Unused. for i in range(len(returned_sort_options.expressions)): self.assertEqual(returned_sort_options.expressions[i].expression, expected_sort_options_expressions[i].expression) self.assertEqual(returned_sort_options.expressions[i].direction, expected_sort_options_expressions[i].direction)
def get(self, request, *args, **kwargs): """ **Description**: Method fetches top 20 tags from search database on loading and handles search queries on users. Users are sorted by amount of their uploaded datasets. """ active_index = "user_index" limit = 50 page = 1 results = [] query_raw = "" if request.GET.get('search_bar') is not None: query_raw = request.GET.get('search_bar') if query_raw <> "": final_query = "user_name_tokenized: "+SearchDatasetView.validate_search_term(query_raw) else: final_query = "" else: query_raw = final_query = "" searchForm = SearchUsersForm(initial={'search_bar': query_raw}) if request.GET.get('page') is not None and request.GET.get('page') != "": if int(request.GET.get('page')) == 0: page = 1 else: page = int(request.GET.get('page')) offset = (limit * page) - limit else: offset = 0 try: index = search.Index(active_index) sort1 = search.SortExpression(expression='dataset_counter', direction=search.SortExpression.DESCENDING, default_value=0) sortops = search.SortOptions(expressions=[sort1]) options=search.QueryOptions(offset=offset, limit=limit, #sort_options = sortops, returned_fields=['user_name', 'dataset_counter','profile_image'], sort_options = sortops ) searchQuery = search.Query(query_string=final_query, options=options) search_results = index.search(searchQuery) number_of_results = search_results.number_found #number of all available results number_of_pages = int((number_of_results / limit)) + 1 number_of_returned_results = len(search_results.results) for doc in search_results: user_name = doc.field('user_name').value dataset_count = int(doc.field('dataset_counter').value) profile_image = doc.field('profile_image').value results.append({'user_name':user_name,'dataset_count':dataset_count,'profile_image':profile_image}) except search.Error: return render(request, self.template_name, {'error': 'We are sorry. Search failed. Try again later please.'}) searchForm = SearchUsersForm(initial={'search_bar': query_raw}) return render(request, self.template_name, {'form': searchForm,'number_of_results':number_of_results, 'results':results, 'page': page, 'number_of_pages':number_of_pages, 'search_query':query_raw})