def get(self, terms): self.response.headers['Access-Control-Allow-Origin'] = '*' scoring = self.request.get('noscore', None) is None include_results = self.request.get('noresults', None) is None include_count = self.request.get('count', None) is not None request_cursor = self.request.get('cursor', None) if not include_results: scoring = False include_count = True try: limit = min(20, int(self.request.get('limit', 20))) except ValueError: self.response.set_status(400) return index = search.Index('repo') cursor = search.Cursor(web_safe_string=request_cursor) try: # Accuracy refers to accurate till n results. accuracy = 2000 if include_count else None sort_options = search.SortOptions(match_scorer=search.MatchScorer()) if scoring else None query_options = search.QueryOptions(limit=limit, number_found_accuracy=accuracy, sort_options=sort_options, cursor=cursor) search_results = index.search(search.Query(query_string=terms, options=query_options)) cursor = search_results.cursor except search.QueryError: self.response.set_status(400) self.response.write('bad query') return count = search_results.number_found if include_results: result_futures = [] for result in search_results.results: (owner, repo) = result.doc_id.split('/') version = None for field in result.fields: if field.name == 'version': version = field.value break library_key = ndb.Key(Library, Library.id(owner, repo)) result_futures.append(LibraryMetadata.brief_async(library_key, version, assume_latest=True)) results = [] for future in result_futures: result = yield future if result is None: # Fixup count when we skip over incomplete entries. count = count - 1 if result is not None: results.append(result) result = { 'cursor': cursor.web_safe_string if cursor and include_results else None, } if include_count: result['count'] = count if include_results: result['results'] = results self.response.headers['Content-Type'] = 'application/json' self.response.write(json.dumps(result))
def post(self): try: q = self.request.get('q') include = self.request.get('include') logging.info("Searching for " + q) query = "(name:%s OR html:%s)" % (q, q) book = self.request.get('book_filter') query = "book:%s AND %s" % (book, query) if book is not None and len( book.strip()) > 0 else query sort_opts = search.SortOptions(match_scorer=search.MatchScorer()) opts = search.QueryOptions(limit=100, snippeted_fields=['html'], sort_options=sort_opts) results = [] for indexName in ["private", "public"]: if include is not None and len( include.strip()) > 0 and include.find(indexName) == -1: results.append({'count': -1, 'results': [], 'show': False}) continue index_results = [] index = search.Index(indexName) active_q = "owners:%s AND %s" % (get_current_session().get( "account"), query) if indexName == "private" else query search_query = search.Query(query_string=active_q, options=opts) search_results = index.search(search_query) for doc in search_results: internal = db.get(doc.doc_id) if internal is not None: logging.info("Got expressions %s" % doc.expressions) index_results.append({ "snippets": doc.expressions, "internal": internal }) results.append({ 'count': search_results.number_found, 'results': index_results, 'show': True }) template_values = { "current_user": get_current_session().get("account"), "private_results": results[0]['results'], "private_count": results[0]['count'], "private_show": results[0]['show'], "public_results": results[1]['results'], "public_count": results[1]['count'], "public_show": results[1]['show'] } path = os.path.join(os.path.dirname(__file__), 'html/search_results.html') self.response.out.write(template.render(path, template_values)) except search.Error: respondWithMessage(self, "Search error")
def _buildQuery(self, query, sortq, sort_dict, doc_limit, offsetval): """Build and return a search query object.""" # computed and returned fields examples. Their use is not required # for the application to function correctly. computed_expr = search.FieldExpression(name='adjusted_price', expression='price * 1.08') returned_fields = [ docs.Product.PID, docs.Product.DESCRIPTION, docs.Product.CATEGORY, docs.Product.AVG_RATING, docs.Product.PRICE, docs.Product.PRODUCT_NAME ] if sortq == 'relevance': # If sorting on 'relevance', use the Match scorer. sortopts = search.SortOptions(match_scorer=search.MatchScorer()) search_query = search.Query( query_string=query.strip(), options=search.QueryOptions( limit=doc_limit, offset=offsetval, sort_options=sortopts, snippeted_fields=[docs.Product.DESCRIPTION], returned_expressions=[computed_expr], returned_fields=returned_fields)) else: # Otherwise (not sorting on relevance), use the selected field as the # first dimension of the sort expression, and the average rating as the # second dimension, unless we're sorting on rating, in which case price # is the second sort dimension. # We get the sort direction and default from the 'sort_dict' var. if sortq == docs.Product.AVG_RATING: expr_list = [ sort_dict.get(sortq), sort_dict.get(docs.Product.PRICE) ] else: expr_list = [ sort_dict.get(sortq), sort_dict.get(docs.Product.AVG_RATING) ] sortopts = search.SortOptions(expressions=expr_list) # logging.info("sortopts: %s", sortopts) search_query = search.Query( query_string=query.strip(), options=search.QueryOptions( limit=doc_limit, offset=offsetval, sort_options=sortopts, snippeted_fields=[docs.Product.DESCRIPTION], returned_expressions=[computed_expr], returned_fields=returned_fields)) return search_query
def search(repo, query_txt, max_results): """ Searches person with index. Query_txt must match at least a part of person name. (It's not allowed to search only by location.) Args: repo: The name of repository query_txt: Search query max_results: The max number of results you want.(Maximum: 1000) Returns: - Array of <model.Person> in datastore - []: If query_txt doesn't contain a part of person name Raises: search.Error: An error occurred when the index name is unknown or the query has syntax error. """ if not query_txt: return [] # Remove double quotes so that we can safely apply enclose_in_double_quotes(). query_txt = re.sub('"', '', query_txt) romanized_query = create_romanized_query_txt(query_txt) non_romanized_query = create_non_romanized_query(query_txt) person_location_index = appengine_search.Index( name=PERSON_LOCATION_FULL_TEXT_INDEX_NAME) expressions = create_sort_expressions() sort_opt = appengine_search.SortOptions( expressions=expressions, match_scorer=appengine_search.MatchScorer()) # Define the fields need to be returned per romanzie method returned_name_fields = [u'names_romanized_by_' + method.__name__ for method in ROMANIZE_METHODS] returned_fields = returned_name_fields + ['record_id'] options = appengine_search.QueryOptions( limit=max_results, sort_options=sort_opt, returned_fields=returned_fields) # enclose_in_double_quotes is used for avoiding query_txt # which specifies index field name, contains special symbol, ... # (e.g., "repo: repository_name", "test: test", "test AND test"). and_query = romanized_query + ' AND (repo: ' + repo + ')' person_location_index_results = person_location_index.search( appengine_search.Query( query_string=and_query, options=options)) # To rank exact matches higher than non-exact matches with the same romanization. non_romanized_and_query = non_romanized_query + ' AND (repo: ' + repo + ')' non_romanized_person_location_index_results = person_location_index.search( appengine_search.Query( query_string=non_romanized_and_query, options=options) ) results_list = [non_romanized_person_location_index_results, person_location_index_results] index_results = get_person_ids_from_results(query_txt, results_list, returned_name_fields) results = [] for id in index_results: result = model.Person.get(repo, id, filter_expired=True) if result: results.append(result) return results
def get(self): # QUERY STRING q = self.request.get('q', default_value='') mobile_coupon = self.request.get('mobile_coupon', default_value='') latlong = self.request.get('latlong', default_value='') results = [] number_found = 0 try: index = search.Index(name='sample') # 位置情報で並び替え expressions = [] if latlong: expressions.append( search.SortExpression( expression='distance(location, geopoint(%s))' % latlong, direction=search.SortExpression.ASCENDING, default_value=None)) # ソートキーの設定 sort_opts = search.SortOptions(match_scorer=search.MatchScorer(), expressions=expressions) # 結果フィールドの設定 options = search.QueryOptions(limit=100, returned_fields=[ 'name', 'content', 'image', 'address', 'tel', 'location' ], snippeted_fields=['content'], sort_options=sort_opts, number_found_accuracy=10000, cursor=None) # 検索クエリの編集 query_string = u'' if q: query_string = u'(content:(%s) OR name:(%s))' % (q, q) if mobile_coupon: query_string += u' mobile_coupon:(%s)' % (mobile_coupon) # 検索実行 query = search.Query(query_string=query_string, options=options) documents = index.search(query) # 検索結果 number_found = documents.number_found for document in documents: # スニペット編集 expressions = [] for expression in document.expressions: expressions.append(expression.value) results.append({ 'doc_id': document.doc_id, 'name': document.field('name').value, 'content': document.field('content').value, 'image': document.field('image').value, 'snippet': ''.join(expressions), 'address': document.field('address').value, 'tel': document.field('tel').value }) # logging.info("#-- SearchHandler : results:%s" % (results)) except Exception as e: logging.exception("#-- SearchHandler Error: id:%s exception:%s" % (id, e)) template = JINJA_ENVIRONMENT.get_template('index.html') self.response.write( template.render({ 'q': q, 'mobile_coupon': mobile_coupon, 'latlong': latlong, 'results': results, 'number_found': number_found }))
def search_line(line, index): line = line + " I" queries_with_text = generate_queries_for_line(line) if not queries_with_text: return None #Generate SOrt Options Object sort_opts = search.SortOptions(match_scorer=search.MatchScorer()) relevance_field_expr = search.FieldExpression(name="relevance_score", expression="_score") #Generate Query Options Object (including returned_fields) query_opts = search.QueryOptions( sort_options=sort_opts, returned_fields=["doc_id_text"], returned_expressions=[relevance_field_expr]) query_results = [] #For each query Make Query and Save Results for query_text, start, end in queries_with_text: query = search.Query(query_string=query_text.strip(), options=query_opts) search_results = "HOHOHOHOHO" try: search_results = index.search(query) except search.Error: return None num_found = search_results.number_found num_returned = len(search_results.results) assert num_found == num_returned, "Too many documents" i = 0 associated_docs = [] doc_scores = [] for doc in reversed(search_results.results ): # Gets 4 highest matching docs? for this query if i > 3: break associated_docs.append(doc) for expr in doc.expressions: if expr.name == "relevance_score": try: doc_scores.append(int(expr.value)) break except: pass i += 1 # doc_scores = [doc.sort_scores[0] for doc in associated_docs] logging.info(doc_scores) avg_score = sum(doc_scores) / float(len(doc_scores)) if len( doc_scores) else -9999999 good_doc_ids = [doc.doc_id for doc in associated_docs] query_results.append( (avg_score, good_doc_ids, query_text, start, end) ) # HOW TO RETREIVE SCORES USED FOR SORTING? NEED THEM TO DETERMINE BEST QUERY #Choose best query print(query_results) best_query = max(query_results, key=lambda x: x[0]) #Make another query for this query, saving the snippet from each text_field final_query = search.Query(query_string=best_query[2].strip(), options=search.QueryOptions( sort_options=sort_opts, returned_fields=["doc_id_text"], snippeted_fields=["doc_text"])) #Get doc ID's, snippets, and start/stop num and return them #CAN ALSO DO ALL OF THE BELOW BY SIMPLY ADDING SNIPPET FIELD TO ORIGINLA QUERIES? best_result = None try: best_results = index.search(final_query) except search.Error: return None i = 0 associated_docs = [] for doc in reversed(best_results.results ): # Gets 4 highest matching docs? for this query if i > 3: break associated_docs.append(doc) i += 1 doc_scores = [doc.sort_scores[0] for doc in associated_docs] avg_score = sum(doc_scores) / float(len(doc_scores)) if len( doc_scores) else -9999999 ids_and_blurbs = [] for doc in associated_docs: for expr in doc.expressions: if expr.name == "doc_text": description_snippet = expr.value ids_and_blurbs.append((doc.doc_id, description_snippet)) break final_ret_val = [avg_score, ids_and_blurbs, best_query[3], best_query[4]] #still need to add KhanAcademy here if final_ret_val[0] < 0: return None return final_ret_val
def _buildQuery(self, query, sortq, sort_dict, doc_limit, offsetval, create_playlist, show_recommendations): """Build and return a search query object.""" # computed and returned fields examples. Their use is not required # for the application to function correctly. # computed_expr = search.FieldExpression( # name='adjusted_duration_min', expression='duration_min * 1.08') returned_fields = ([ docs.Video.UNIQUEID, docs.Video.TITLE, docs.Video.CATEGORY, docs.Video.SUBTITLE, docs.Video.DURATION_MIN, docs.Video.SPEAKERS, docs.Video.DESCRIPTION, docs.Video.PUBLISHED_DATE, docs.Video.VIEWS, docs.Video.SLIDES_LINK, docs.Video.TAGS, docs.Video.IMAGE, docs.Video.VID, docs.Video.SESSIONID, docs.Video.AVG_RATING, ] if not create_playlist else [docs.Video.VID, docs.Video.CATEGORY]) if sortq == 'relevance': # If sorting on 'relevance', use the Match scorer which is based on # sort options. sortopts = search.SortOptions(match_scorer=search.MatchScorer()) search_query = search.Query( query_string=query.strip(), options=search.QueryOptions( limit=doc_limit, offset=offsetval, sort_options=sortopts, snippeted_fields=([] if show_recommendations else [docs.Video.DESCRIPTION]), # returned_expressions=[computed_expr], returned_fields=returned_fields)) else: # Otherwise (not sorting on relevance), use the selected field as the # first dimension of the sort expression, and the average rating as the # second dimension, unless we're sorting on rating, in which case # duration_min is the second sort dimension. # We get the sort direction and default from the 'sort_dict' var. if sortq == docs.Video.AVG_RATING: expr_list = [ sort_dict.get(sortq), sort_dict.get(docs.Video.DURATION_MIN) ] else: # expr_list = [sort_dict.get(sortq), # sort_dict.get(docs.Video.AVG_RATING)] expr_list = [sort_dict.get(sortq)] sortopts = search.SortOptions(expressions=expr_list) # logging.info("sortopts: %s", sortopts) search_query = search.Query( query_string=query.strip(), options=search.QueryOptions( limit=doc_limit, offset=offsetval, sort_options=sortopts, snippeted_fields=([] if show_recommendations else [docs.Video.DESCRIPTION]), # returned_expressions=[computed_expr], returned_fields=returned_fields)) return search_query
def get(self, request, *args, **kwargs): """ **Description**: This method handles GET requests. It looks for search query string in the request header and if it is found the search process is run. Returned results are then rendered in the loaded template and listed on the page. :param self: :param request: Includes a search query string submitted from a search form. :param *args: :param **kwargs: :return: """ number_of_results = 0 results = [] limit = 300 if request.GET.get('search_bar') is not None: query = "" term_raw = unquote(request.GET.get('search_bar')) term = SearchDatasetView.validate_search_term(term_raw) # term = term_array.decode('utf-8','replace') filters = { 'search_bar': term_raw, 'authors': False, 'citations': False, 'titles': False, 'descriptions': False, 'tags': False, 'time': 4 } if term <> "": query = "(" if request.GET.get('authors') is not None: query += "(author_tokenized:" + term + ") OR " filters['authors'] = True if request.GET.get('citations') is not None: query += "(citation:" + term + ") OR " filters['citations'] = True if request.GET.get("titles") is not None: query += "(title_tokenized:" + term + ") OR " filters['titles'] = True if request.GET.get("descriptions") is not None: query += "(description:" + term + ") OR " filters['descriptions'] = True if request.GET.get("tags") is not None: query += "(tag:" + term + ") OR " filters['tags'] = True if query == "(": query = "(author_tokenized: " + term + ") OR (citation:" + term + ") OR (title_tokenized:" + term + ") OR (description:" + term + ") OR (tag:" + term + ") AND " filters = { 'search_bar': term_raw, 'authors': True, 'citations': True, 'titles': True, 'descriptions': True, 'tags': True } else: query = query[:-4] + ") AND " else: filters = { 'search_bar': term_raw, 'authors': True, 'citations': True, 'titles': True, 'descriptions': True, 'tags': True } if request.GET.get( "time") is not None and request.GET.get("time") != '4': time_index = request.GET.get("time") if time_index == '0': oldest_date = datetime.datetime.strftime( datetime.datetime.now() - datetime.timedelta(days=1), '%Y-%m-%d') filters['time'] = 0 elif time_index == '1': oldest_date = datetime.datetime.strftime( datetime.datetime.now() - datetime.timedelta(weeks=1), '%Y-%m-%d') filters['time'] = 1 elif time_index == '2': oldest_date = datetime.datetime.strftime( datetime.datetime.now() - datetime.timedelta(days=30), '%Y-%m-%d') filters['time'] = 2 elif time_index == '3': today = datetime.date.today() oldest_date = datetime.datetime.strftime( datetime.datetime.today().replace(year=today.year - 1), '%Y-%m-%d') filters['time'] = 3 query += "uploaded >= \"" + oldest_date + "\"" else: query = query[:-4] filters['time'] = 4 searchForm = SearchForm(initial=filters) try: index = search.Index(active_index) sortops = search.SortOptions(match_scorer=search.MatchScorer()) options = search.QueryOptions(limit=limit, returned_fields=[ 'title', 'author', 'uploaded', 'description', 'id_datastore', 'url_alias', 'tag' ], sort_options=sortops) searchQuery = search.Query(query_string=query, options=options) # print "\n\n final_query: "+query search_results = index.search(searchQuery) number_of_results = search_results.number_found #number of all available results #number_of_pages = int((number_of_results / limit)) + 1 number_of_returned_results = len(search_results.results) for doc in search_results: title = doc.field('title').value author = doc.field('author').value uploaded = doc.field('uploaded').value #description = doc.field('description').value description = mark_safe( self.create_snippets(term_raw, doc.field('description').value)) id_datastore = doc.field('id_datastore').value url_alias = doc.field('url_alias').value tags = [field.value for field in doc['tag']] results.append([ title, author, uploaded, description, id_datastore, url_alias, tags ]) except search.Error: return render( request, self.template_name, { 'error': 'We are sorry. Search failed. Try again later please.' }) else: searchForm = SearchForm( initial={ 'authors': True, 'citations': True, 'titles': True, 'descriptions': True, 'tags': True, 'time': 4 }) return render(request, self.template_name, {'form': searchForm}) return render( request, self.template_name, { 'form': searchForm, 'number_of_results': number_of_results, 'results': results, 'search_query': query })