Python MatchScorer примеры, google.appengine.api.search.MatchScorer Python примеры использования

Пример #1

0

Показать файл

Файл: api.py Проект: webcomponents/webcomponents.org

  def get(self, terms):
    self.response.headers['Access-Control-Allow-Origin'] = '*'
    scoring = self.request.get('noscore', None) is None
    include_results = self.request.get('noresults', None) is None
    include_count = self.request.get('count', None) is not None
    request_cursor = self.request.get('cursor', None)

    if not include_results:
      scoring = False
      include_count = True
    try:
      limit = min(20, int(self.request.get('limit', 20)))
    except ValueError:
      self.response.set_status(400)
      return
    index = search.Index('repo')
    cursor = search.Cursor(web_safe_string=request_cursor)
    try:
      # Accuracy refers to accurate till n results.
      accuracy = 2000 if include_count else None
      sort_options = search.SortOptions(match_scorer=search.MatchScorer()) if scoring else None
      query_options = search.QueryOptions(limit=limit, number_found_accuracy=accuracy, sort_options=sort_options, cursor=cursor)
      search_results = index.search(search.Query(query_string=terms, options=query_options))
      cursor = search_results.cursor
    except search.QueryError:
      self.response.set_status(400)
      self.response.write('bad query')
      return

    count = search_results.number_found
    if include_results:
      result_futures = []
      for result in search_results.results:
        (owner, repo) = result.doc_id.split('/')
        version = None
        for field in result.fields:
          if field.name == 'version':
            version = field.value
            break
        library_key = ndb.Key(Library, Library.id(owner, repo))
        result_futures.append(LibraryMetadata.brief_async(library_key, version, assume_latest=True))
      results = []
      for future in result_futures:
        result = yield future
        if result is None:
          # Fixup count when we skip over incomplete entries.
          count = count - 1
        if result is not None:
          results.append(result)

    result = {
        'cursor': cursor.web_safe_string if cursor and include_results else None,
    }
    if include_count:
      result['count'] = count
    if include_results:
      result['results'] = results

    self.response.headers['Content-Type'] = 'application/json'
    self.response.write(json.dumps(result))

Пример #2

0

Показать файл

Файл: main.py Проект: JL-Moriarty/epubhub

    def post(self):
        try:
            q = self.request.get('q')
            include = self.request.get('include')
            logging.info("Searching for " + q)
            query = "(name:%s OR html:%s)" % (q, q)
            book = self.request.get('book_filter')
            query = "book:%s AND %s" % (book,
                                        query) if book is not None and len(
                                            book.strip()) > 0 else query
            sort_opts = search.SortOptions(match_scorer=search.MatchScorer())
            opts = search.QueryOptions(limit=100,
                                       snippeted_fields=['html'],
                                       sort_options=sort_opts)
            results = []
            for indexName in ["private", "public"]:
                if include is not None and len(
                        include.strip()) > 0 and include.find(indexName) == -1:
                    results.append({'count': -1, 'results': [], 'show': False})
                    continue
                index_results = []
                index = search.Index(indexName)
                active_q = "owners:%s AND %s" % (get_current_session().get(
                    "account"), query) if indexName == "private" else query
                search_query = search.Query(query_string=active_q,
                                            options=opts)
                search_results = index.search(search_query)
                for doc in search_results:
                    internal = db.get(doc.doc_id)
                    if internal is not None:
                        logging.info("Got expressions %s" % doc.expressions)
                        index_results.append({
                            "snippets": doc.expressions,
                            "internal": internal
                        })
                results.append({
                    'count': search_results.number_found,
                    'results': index_results,
                    'show': True
                })

            template_values = {
                "current_user": get_current_session().get("account"),
                "private_results": results[0]['results'],
                "private_count": results[0]['count'],
                "private_show": results[0]['show'],
                "public_results": results[1]['results'],
                "public_count": results[1]['count'],
                "public_show": results[1]['show']
            }
            path = os.path.join(os.path.dirname(__file__),
                                'html/search_results.html')
            self.response.out.write(template.render(path, template_values))
        except search.Error:
            respondWithMessage(self, "Search error")

Пример #3

0

Показать файл

Файл: handlers.py Проект: abzaloid/webapp2-search

    def _buildQuery(self, query, sortq, sort_dict, doc_limit, offsetval):
        """Build and return a search query object."""

        # computed and returned fields examples.  Their use is not required
        # for the application to function correctly.
        computed_expr = search.FieldExpression(name='adjusted_price',
                                               expression='price * 1.08')
        returned_fields = [
            docs.Product.PID, docs.Product.DESCRIPTION, docs.Product.CATEGORY,
            docs.Product.AVG_RATING, docs.Product.PRICE,
            docs.Product.PRODUCT_NAME
        ]

        if sortq == 'relevance':
            # If sorting on 'relevance', use the Match scorer.
            sortopts = search.SortOptions(match_scorer=search.MatchScorer())
            search_query = search.Query(
                query_string=query.strip(),
                options=search.QueryOptions(
                    limit=doc_limit,
                    offset=offsetval,
                    sort_options=sortopts,
                    snippeted_fields=[docs.Product.DESCRIPTION],
                    returned_expressions=[computed_expr],
                    returned_fields=returned_fields))
        else:
            # Otherwise (not sorting on relevance), use the selected field as the
            # first dimension of the sort expression, and the average rating as the
            # second dimension, unless we're sorting on rating, in which case price
            # is the second sort dimension.
            # We get the sort direction and default from the 'sort_dict' var.
            if sortq == docs.Product.AVG_RATING:
                expr_list = [
                    sort_dict.get(sortq),
                    sort_dict.get(docs.Product.PRICE)
                ]
            else:
                expr_list = [
                    sort_dict.get(sortq),
                    sort_dict.get(docs.Product.AVG_RATING)
                ]
            sortopts = search.SortOptions(expressions=expr_list)
            # logging.info("sortopts: %s", sortopts)
            search_query = search.Query(
                query_string=query.strip(),
                options=search.QueryOptions(
                    limit=doc_limit,
                    offset=offsetval,
                    sort_options=sortopts,
                    snippeted_fields=[docs.Product.DESCRIPTION],
                    returned_expressions=[computed_expr],
                    returned_fields=returned_fields))
        return search_query

Пример #4

0

Показать файл

def search(repo, query_txt, max_results):
    """
    Searches person with index.
    Query_txt must match at least a part of person name.
    (It's not allowed to search only by location.)
    Args:
        repo: The name of repository
        query_txt: Search query
        max_results: The max number of results you want.(Maximum: 1000)

    Returns:
        - Array of <model.Person> in datastore
        - []: If query_txt doesn't contain a part of person name

    Raises:
        search.Error: An error occurred when the index name is unknown
                      or the query has syntax error.
    """
    if not query_txt:
        return []

    # Remove double quotes so that we can safely apply enclose_in_double_quotes().
    query_txt = re.sub('"', '', query_txt)
    romanized_query = create_romanized_query_txt(query_txt)
    non_romanized_query = create_non_romanized_query(query_txt)

    person_location_index = appengine_search.Index(
        name=PERSON_LOCATION_FULL_TEXT_INDEX_NAME)

    expressions = create_sort_expressions()
    sort_opt = appengine_search.SortOptions(
        expressions=expressions, match_scorer=appengine_search.MatchScorer())


    # Define the fields need to be returned per romanzie method
    returned_name_fields = [u'names_romanized_by_' + method.__name__
                            for method in ROMANIZE_METHODS]
    returned_fields = returned_name_fields + ['record_id']

    options = appengine_search.QueryOptions(
        limit=max_results,
        sort_options=sort_opt,
        returned_fields=returned_fields)


    # enclose_in_double_quotes is used for avoiding query_txt
    # which specifies index field name, contains special symbol, ...
    # (e.g., "repo: repository_name", "test: test", "test AND test").
    and_query = romanized_query + ' AND (repo: ' + repo + ')'
    person_location_index_results = person_location_index.search(
        appengine_search.Query(
            query_string=and_query, options=options))

    # To rank exact matches higher than non-exact matches with the same romanization.
    non_romanized_and_query = non_romanized_query + ' AND (repo: ' + repo + ')'
    non_romanized_person_location_index_results = person_location_index.search(
        appengine_search.Query(
            query_string=non_romanized_and_query, options=options)
    )

    results_list = [non_romanized_person_location_index_results,
                    person_location_index_results]
    index_results = get_person_ids_from_results(query_txt, results_list,
                                                returned_name_fields)

    results = []
    for id in index_results:
        result = model.Person.get(repo, id, filter_expired=True)
        if result:
            results.append(result)
    return results

Пример #5

0

Показать файл

    def get(self):
        # QUERY STRING
        q = self.request.get('q', default_value='')
        mobile_coupon = self.request.get('mobile_coupon', default_value='')
        latlong = self.request.get('latlong', default_value='')

        results = []
        number_found = 0
        try:
            index = search.Index(name='sample')
            # 位置情報で並び替え
            expressions = []
            if latlong:
                expressions.append(
                    search.SortExpression(
                        expression='distance(location, geopoint(%s))' %
                        latlong,
                        direction=search.SortExpression.ASCENDING,
                        default_value=None))
            # ソートキーの設定
            sort_opts = search.SortOptions(match_scorer=search.MatchScorer(),
                                           expressions=expressions)

            # 結果フィールドの設定
            options = search.QueryOptions(limit=100,
                                          returned_fields=[
                                              'name', 'content', 'image',
                                              'address', 'tel', 'location'
                                          ],
                                          snippeted_fields=['content'],
                                          sort_options=sort_opts,
                                          number_found_accuracy=10000,
                                          cursor=None)

            # 検索クエリの編集
            query_string = u''
            if q:
                query_string = u'(content:(%s) OR name:(%s))' % (q, q)
            if mobile_coupon:
                query_string += u' mobile_coupon:(%s)' % (mobile_coupon)

            # 検索実行
            query = search.Query(query_string=query_string, options=options)
            documents = index.search(query)

            # 検索結果
            number_found = documents.number_found
            for document in documents:
                # スニペット編集
                expressions = []
                for expression in document.expressions:
                    expressions.append(expression.value)
                results.append({
                    'doc_id': document.doc_id,
                    'name': document.field('name').value,
                    'content': document.field('content').value,
                    'image': document.field('image').value,
                    'snippet': ''.join(expressions),
                    'address': document.field('address').value,
                    'tel': document.field('tel').value
                })
            # logging.info("#-- SearchHandler : results:%s" % (results))

        except Exception as e:
            logging.exception("#-- SearchHandler Error: id:%s exception:%s" %
                              (id, e))

        template = JINJA_ENVIRONMENT.get_template('index.html')
        self.response.write(
            template.render({
                'q': q,
                'mobile_coupon': mobile_coupon,
                'latlong': latlong,
                'results': results,
                'number_found': number_found
            }))

Пример #6

0

Показать файл

def search_line(line, index):
    line = line + " I"
    queries_with_text = generate_queries_for_line(line)
    if not queries_with_text:
        return None
    #Generate SOrt Options Object
    sort_opts = search.SortOptions(match_scorer=search.MatchScorer())
    relevance_field_expr = search.FieldExpression(name="relevance_score",
                                                  expression="_score")
    #Generate Query Options Object (including returned_fields)
    query_opts = search.QueryOptions(
        sort_options=sort_opts,
        returned_fields=["doc_id_text"],
        returned_expressions=[relevance_field_expr])
    query_results = []
    #For each query Make Query and Save Results
    for query_text, start, end in queries_with_text:
        query = search.Query(query_string=query_text.strip(),
                             options=query_opts)
        search_results = "HOHOHOHOHO"
        try:
            search_results = index.search(query)
        except search.Error:
            return None
        num_found = search_results.number_found
        num_returned = len(search_results.results)
        assert num_found == num_returned, "Too many documents"
        i = 0
        associated_docs = []
        doc_scores = []
        for doc in reversed(search_results.results
                            ):  # Gets 4 highest matching docs? for this query
            if i > 3:
                break
            associated_docs.append(doc)
            for expr in doc.expressions:
                if expr.name == "relevance_score":
                    try:
                        doc_scores.append(int(expr.value))
                        break
                    except:
                        pass
            i += 1
#        doc_scores = [doc.sort_scores[0] for doc in associated_docs]
        logging.info(doc_scores)
        avg_score = sum(doc_scores) / float(len(doc_scores)) if len(
            doc_scores) else -9999999
        good_doc_ids = [doc.doc_id for doc in associated_docs]
        query_results.append(
            (avg_score, good_doc_ids, query_text, start, end)
        )  # HOW TO RETREIVE SCORES USED FOR SORTING? NEED THEM TO DETERMINE BEST QUERY
    #Choose best query
    print(query_results)
    best_query = max(query_results, key=lambda x: x[0])
    #Make another query for this query, saving the snippet from each text_field
    final_query = search.Query(query_string=best_query[2].strip(),
                               options=search.QueryOptions(
                                   sort_options=sort_opts,
                                   returned_fields=["doc_id_text"],
                                   snippeted_fields=["doc_text"]))
    #Get doc ID's, snippets, and start/stop num and return them
    #CAN ALSO DO ALL OF THE BELOW BY SIMPLY ADDING SNIPPET FIELD TO ORIGINLA QUERIES?
    best_result = None
    try:
        best_results = index.search(final_query)
    except search.Error:
        return None
    i = 0
    associated_docs = []
    for doc in reversed(best_results.results
                        ):  # Gets 4 highest matching docs? for this query
        if i > 3:
            break
        associated_docs.append(doc)
        i += 1
    doc_scores = [doc.sort_scores[0] for doc in associated_docs]
    avg_score = sum(doc_scores) / float(len(doc_scores)) if len(
        doc_scores) else -9999999
    ids_and_blurbs = []
    for doc in associated_docs:
        for expr in doc.expressions:
            if expr.name == "doc_text":
                description_snippet = expr.value
                ids_and_blurbs.append((doc.doc_id, description_snippet))
                break
    final_ret_val = [avg_score, ids_and_blurbs, best_query[3],
                     best_query[4]]  #still need to add KhanAcademy here
    if final_ret_val[0] < 0:
        return None
    return final_ret_val

Пример #7

0

Показать файл

    def _buildQuery(self, query, sortq, sort_dict, doc_limit, offsetval,
                    create_playlist, show_recommendations):
        """Build and return a search query object."""
        # computed and returned fields examples.  Their use is not required
        # for the application to function correctly.
        # computed_expr = search.FieldExpression(
        #     name='adjusted_duration_min', expression='duration_min * 1.08')
        returned_fields = ([
            docs.Video.UNIQUEID,
            docs.Video.TITLE,
            docs.Video.CATEGORY,
            docs.Video.SUBTITLE,
            docs.Video.DURATION_MIN,
            docs.Video.SPEAKERS,
            docs.Video.DESCRIPTION,
            docs.Video.PUBLISHED_DATE,
            docs.Video.VIEWS,
            docs.Video.SLIDES_LINK,
            docs.Video.TAGS,
            docs.Video.IMAGE,
            docs.Video.VID,
            docs.Video.SESSIONID,
            docs.Video.AVG_RATING,
        ] if not create_playlist else [docs.Video.VID, docs.Video.CATEGORY])

        if sortq == 'relevance':
            # If sorting on 'relevance', use the Match scorer which is based on
            # sort options.
            sortopts = search.SortOptions(match_scorer=search.MatchScorer())
            search_query = search.Query(
                query_string=query.strip(),
                options=search.QueryOptions(
                    limit=doc_limit,
                    offset=offsetval,
                    sort_options=sortopts,
                    snippeted_fields=([] if show_recommendations else
                                      [docs.Video.DESCRIPTION]),
                    # returned_expressions=[computed_expr],
                    returned_fields=returned_fields))
        else:
            # Otherwise (not sorting on relevance), use the selected field as the
            # first dimension of the sort expression, and the average rating as the
            # second dimension, unless we're sorting on rating, in which case
            # duration_min is the second sort dimension.
            # We get the sort direction and default from the 'sort_dict' var.
            if sortq == docs.Video.AVG_RATING:
                expr_list = [
                    sort_dict.get(sortq),
                    sort_dict.get(docs.Video.DURATION_MIN)
                ]
            else:
                # expr_list = [sort_dict.get(sortq),
                #              sort_dict.get(docs.Video.AVG_RATING)]
                expr_list = [sort_dict.get(sortq)]
            sortopts = search.SortOptions(expressions=expr_list)
            # logging.info("sortopts: %s", sortopts)
            search_query = search.Query(
                query_string=query.strip(),
                options=search.QueryOptions(
                    limit=doc_limit,
                    offset=offsetval,
                    sort_options=sortopts,
                    snippeted_fields=([] if show_recommendations else
                                      [docs.Video.DESCRIPTION]),
                    # returned_expressions=[computed_expr],
                    returned_fields=returned_fields))
        return search_query

Пример #8

0

Показать файл

Файл: views.py Проект: emz0/old-projects

    def get(self, request, *args, **kwargs):
        """
		**Description**:
			This method handles GET requests. It looks for search query string in the request header
			and if it is found the search process is run. Returned results are then rendered in the loaded
			template and listed on the page.

		:param self:
		:param request: Includes a search query string submitted from a search form.
		:param *args:
		:param **kwargs:
		:return:
		"""

        number_of_results = 0
        results = []
        limit = 300

        if request.GET.get('search_bar') is not None:

            query = ""
            term_raw = unquote(request.GET.get('search_bar'))
            term = SearchDatasetView.validate_search_term(term_raw)
            # term = term_array.decode('utf-8','replace')
            filters = {
                'search_bar': term_raw,
                'authors': False,
                'citations': False,
                'titles': False,
                'descriptions': False,
                'tags': False,
                'time': 4
            }
            if term <> "":
                query = "("
                if request.GET.get('authors') is not None:
                    query += "(author_tokenized:" + term + ") OR "
                    filters['authors'] = True

                if request.GET.get('citations') is not None:
                    query += "(citation:" + term + ") OR "
                    filters['citations'] = True

                if request.GET.get("titles") is not None:
                    query += "(title_tokenized:" + term + ") OR "
                    filters['titles'] = True

                if request.GET.get("descriptions") is not None:
                    query += "(description:" + term + ") OR "
                    filters['descriptions'] = True
                if request.GET.get("tags") is not None:
                    query += "(tag:" + term + ") OR "
                    filters['tags'] = True

                if query == "(":
                    query = "(author_tokenized: " + term + ") OR (citation:" + term + ") OR (title_tokenized:" + term + ") OR (description:" + term + ") OR (tag:" + term + ") AND "
                    filters = {
                        'search_bar': term_raw,
                        'authors': True,
                        'citations': True,
                        'titles': True,
                        'descriptions': True,
                        'tags': True
                    }

                else:
                    query = query[:-4] + ") AND "
            else:
                filters = {
                    'search_bar': term_raw,
                    'authors': True,
                    'citations': True,
                    'titles': True,
                    'descriptions': True,
                    'tags': True
                }

            if request.GET.get(
                    "time") is not None and request.GET.get("time") != '4':
                time_index = request.GET.get("time")

                if time_index == '0':
                    oldest_date = datetime.datetime.strftime(
                        datetime.datetime.now() - datetime.timedelta(days=1),
                        '%Y-%m-%d')
                    filters['time'] = 0
                elif time_index == '1':
                    oldest_date = datetime.datetime.strftime(
                        datetime.datetime.now() - datetime.timedelta(weeks=1),
                        '%Y-%m-%d')
                    filters['time'] = 1
                elif time_index == '2':
                    oldest_date = datetime.datetime.strftime(
                        datetime.datetime.now() - datetime.timedelta(days=30),
                        '%Y-%m-%d')
                    filters['time'] = 2
                elif time_index == '3':
                    today = datetime.date.today()
                    oldest_date = datetime.datetime.strftime(
                        datetime.datetime.today().replace(year=today.year - 1),
                        '%Y-%m-%d')
                    filters['time'] = 3
                query += "uploaded >= \"" + oldest_date + "\""
            else:
                query = query[:-4]
                filters['time'] = 4

            searchForm = SearchForm(initial=filters)

            try:
                index = search.Index(active_index)

                sortops = search.SortOptions(match_scorer=search.MatchScorer())

                options = search.QueryOptions(limit=limit,
                                              returned_fields=[
                                                  'title', 'author',
                                                  'uploaded', 'description',
                                                  'id_datastore', 'url_alias',
                                                  'tag'
                                              ],
                                              sort_options=sortops)
                searchQuery = search.Query(query_string=query, options=options)
                # print "\n\n final_query: "+query

                search_results = index.search(searchQuery)

                number_of_results = search_results.number_found  #number of all available results
                #number_of_pages = int((number_of_results / limit)) + 1
                number_of_returned_results = len(search_results.results)

                for doc in search_results:
                    title = doc.field('title').value
                    author = doc.field('author').value
                    uploaded = doc.field('uploaded').value
                    #description = doc.field('description').value
                    description = mark_safe(
                        self.create_snippets(term_raw,
                                             doc.field('description').value))
                    id_datastore = doc.field('id_datastore').value
                    url_alias = doc.field('url_alias').value

                    tags = [field.value for field in doc['tag']]

                    results.append([
                        title, author, uploaded, description, id_datastore,
                        url_alias, tags
                    ])

            except search.Error:
                return render(
                    request, self.template_name, {
                        'error':
                        'We are sorry. Search failed. Try again later please.'
                    })
        else:
            searchForm = SearchForm(
                initial={
                    'authors': True,
                    'citations': True,
                    'titles': True,
                    'descriptions': True,
                    'tags': True,
                    'time': 4
                })
            return render(request, self.template_name, {'form': searchForm})

        return render(
            request, self.template_name, {
                'form': searchForm,
                'number_of_results': number_of_results,
                'results': results,
                'search_query': query
            })

Python MatchScorer примеры использования