예제 #1
0
    def search(self, text, scope=None, limit=20):
        """Search from the current index

		Args:
			text (str): String to search for
			scope (str, optional): Scope to limit the search. Defaults to None.
			limit (int, optional): Limit number of search results. Defaults to 20.

		Returns:
			[List(_dict)]: Search results
		"""
        ix = self.get_index()

        results = None
        out = []

        with ix.searcher() as searcher:
            parser = MultifieldParser(["title", "content"], ix.schema)
            parser.remove_plugin_class(FieldsPlugin)
            parser.remove_plugin_class(WildcardPlugin)
            query = parser.parse(text)

            filter_scoped = None
            if scope:
                filter_scoped = Prefix(self.id, scope)
            results = searcher.search(query, limit=limit, filter=filter_scoped)

            for r in results:
                out.append(self.parse_result(r))

        return out
예제 #2
0
def generic(idx, qs=None, q=None, limit=5, parser=None, page=1):
    if qs is q is None:
        raise ValueError('cannot have a null querystring and query')

    if parser is None:
        parser = MultifieldParser(
                ['title', 'keywords', 'summary', 'content', 'author'], idx.schema, group=OrGroup)

    # add better date parsing support
    parser.add_plugin(DateParserPlugin())
    parser.remove_plugin_class(WildcardPlugin)

    with idx.searcher() as search:
        # generate the Query object
        if qs:
            query = parser.parse(qs)
        else:
            query = q

        facet = MultiFacet()
        facet.add_score()
        facet.add_field('modified', reverse=True)
        facet.add_field('title')

        results = search.search_page(query, pagenum=page, sortedby=facet, pagelen=limit)
        res = clean_results(idx, results, query)

        # pagination attributes on `search_page` method
        res.page_number = results.pagenum   # current page number
        res.page_total = results.pagecount  # total pages in results
        res.offset = results.offset         # first result of current page
        res.pagelen = results.pagelen       # the number of max results per page

    return res
예제 #3
0
def search(index_name, text, scope=None, limit=20):
    index_dir = get_index_path(index_name)
    ix = open_dir(index_dir)

    results = None
    out = []
    with ix.searcher() as searcher:
        parser = MultifieldParser(["title", "content"], ix.schema)
        parser.remove_plugin_class(FieldsPlugin)
        parser.remove_plugin_class(WildcardPlugin)
        query = parser.parse(text)

        filter_scoped = None
        if scope:
            filter_scoped = Prefix("path", scope)
        results = searcher.search(query, limit=limit, filter=filter_scoped)

        for r in results:
            title_highlights = r.highlights("title")
            content_highlights = r.highlights("content")
            out.append(
                frappe._dict(
                    title=r["title"],
                    path=r["path"],
                    title_highlights=title_highlights,
                    content_highlights=content_highlights,
                ))

    return out
예제 #4
0
def search(querystring, language_code):
    ix = LanguageIndex(settings.WHOOSH_INDEX_PATH, language_code,
                       _get_schema()).load()
    # parser = QueryParser('content', ix.schema)
    parser = MultifieldParser(
        ['title', 'keywords', 'content'],
        ix.schema)  # fieldboosts={'title':5, 'keywords':4, 'content':1})
    parser.remove_plugin_class(
        WildcardPlugin)  # remove unused feature for better performance
    query = parser.parse(querystring)
    # print(parser, query, querystring)

    result = {
        'results': [],
    }

    with ix.searcher() as searcher:
        results = searcher.search(query)
        # print(results)
        # import pdb; pdb.set_trace()

        # collect results
        for hit in results:
            my_hit = {}
            # my_hit['pos'] = hit.pos
            # my_hit['rank'] = hit.rank
            # my_hit['docnum'] = hit.docnum
            my_hit['score'] = hit.score
            my_hit['object'] = Article.objects.get(code=hit.fields()['code'])
            #.exclude(published=False).exclude(release_date__gte=datetime.today())
            # my_hit['object']['is_visible'] = True
            result['results'].append(my_hit)
            # print(hit.pos, hit.rank, hit.docnum, hit.score, hit)

    return result
예제 #5
0
def search(querystring, language_code):
    ix = LanguageIndex(settings.WHOOSH_INDEX_PATH, language_code, _get_schema()).load()
    # parser = QueryParser('content', ix.schema)
    parser = MultifieldParser(['title', 'keywords', 'content'], ix.schema)  # fieldboosts={'title':5, 'keywords':4, 'content':1})
    parser.remove_plugin_class(WildcardPlugin)  # remove unused feature for better performance
    query = parser.parse(querystring)
    # print(parser, query, querystring)

    result = {
        'results': [],
    }

    with ix.searcher() as searcher:
        results = searcher.search(query)
        # print(results)
        # import pdb; pdb.set_trace()

        # collect results
        for hit in results:
            my_hit = {}
            # my_hit['pos'] = hit.pos
            # my_hit['rank'] = hit.rank
            # my_hit['docnum'] = hit.docnum
            my_hit['score'] = hit.score
            my_hit['object'] = Article.objects.get(code=hit.fields()['code'])
            #.exclude(published=False).exclude(release_date__gte=datetime.today())
            # my_hit['object']['is_visible'] = True
            result['results'].append(my_hit)
            # print(hit.pos, hit.rank, hit.docnum, hit.score, hit)

    return result
예제 #6
0
    def getSpecialCasesResults(self, value, translation):
        """Takes in a query and compares it to hard-coded special cases.
        The special cases are for the "Miracle Letters"

        :param translation: The requested translation type
        :type translation: str
        :return: A list of ayah matches if there is a match, otherwise returns None
        :rtype: list, None
        """
        matchingAyahList = []
        for case in SPECIAL_CASES:
            if case[0] == value:
                value = case[1]
                matchingAyahList = case[2]

        if len(matchingAyahList) > 0:
            allowedResults = []
            for matchingAyah in matchingAyahList:
                allowedResults.append(
                    "surah_num:" + str(matchingAyah[0]) + " AND ayah_num:" + str(
                            matchingAyah[1]))
            parser = MultifieldParser(["surah_num", "ayah_num"], self._ix.schema)
            parser.remove_plugin_class(PhrasePlugin)
            parser.add_plugin(SequencePlugin())
            query = parser.parse(" OR ".join(allowedResults))
            with self._ix.searcher() as searcher:
                results = searcher.search(query, limit=7)
                return self._getResponseObjectFromParams(
                        value,
                        self._getMatchesFromResults(results, translation),
                        [],
                        []
                )
        else:
            return None
예제 #7
0
    def search(self, text, scope=None, limit=20):
        """Search from the current index

		Args:
			text (str): String to search for
			scope (str, optional): Scope to limit the search. Defaults to None.
			limit (int, optional): Limit number of search results. Defaults to 20.

		Returns:
			[List(_dict)]: Search results
		"""
        ix = self.get_index()

        results = None
        out = []

        search_fields = self.get_fields_to_search()
        fieldboosts = {}

        # apply reducing boost on fields based on order. 1.0, 0.5, 0.33 and so on
        for idx, field in enumerate(search_fields, start=1):
            fieldboosts[field] = 1.0 / idx

        with ix.searcher() as searcher:
            parser = MultifieldParser(search_fields,
                                      ix.schema,
                                      termclass=FuzzyTermExtended,
                                      fieldboosts=fieldboosts)
            parser.remove_plugin_class(FieldsPlugin)
            parser.remove_plugin_class(WildcardPlugin)
            query = parser.parse(text)

            filter_scoped = None
            if scope:
                filter_scoped = Prefix(self.id, scope)
            results = searcher.search(query, limit=limit, filter=filter_scoped)

            for r in results:
                out.append(self.parse_result(r))

        return out
예제 #8
0
 def search(self, queries, fuzzy = True, default_fields = [], max_results = None):
     if type(queries) != list:
         queries = [queries]
     if type(default_fields) != list:
         default_fields = [default_fields]
     if fuzzy and len(queries) == 1 and len(queries[0].split()) == 1 and ':' not in queries[0] and '*' not in queries[0]:
         queries = ['*%s*' % (queries[0])]
     for query in queries:
         if type(query) != unicode:
             query = query.decode('utf-8')
         log.msg('search query: %s' % (query))
         with self.ix.searcher() as searcher:
             parser = MultifieldParser(default_fields, self.ix.schema)
             parser.remove_plugin_class(plugins.WildcardPlugin)
             parser.add_plugin(WildcardPlugin)
             query = parser.parse(query)
             log.msg('search query parsed: %s' % (query))
             results = searcher.search(query, limit = None)
             count = 0
             for result in results:
                 yield result['oid']
                 count += 1
                 if max_results and count >= max_results:
                     break
예제 #9
0
writer = ix.writer()

_string = sys.argv[1]
_mode = sys.argv[2]
normal = (_mode == "normal")

_distance = 0
if (normal is False):
    _distance = int(sys.argv[3])

with ix.searcher() as searcher:
    # og = qparser.OrGroup.factory(0.9)
    parser = MultifieldParser(["title", "sub_title", "author", "content"],
                              schema=ix.schema)
    # parser = qparser.QueryParser("content", ix.schema)
    parser.remove_plugin_class(qparser.PhrasePlugin)
    parser.add_plugin(qparser.SequencePlugin())

    if (normal):
        string = _string
        query = parser.parse(string)
    else:
        # proximity
        distance = _distance
        proximty_query = "\"" + _string + "\"" + '~' + str((1 + distance) * 3)
        query = parser.parse(proximty_query)

    # sys.stdout.buffer.write(query)
    sys.stdout.buffer.write(">>>>>>OUTPUT start<<<<<<".encode('utf-8'))
    results = searcher.search(query, limit=20)
    results.fragmenter.maxchars = 100
예제 #10
0
    def get_result(self,
                   value: str,
                   translation: str,
                   limit: Optional[int] = None) -> Dict:
        """ Checks to see if there are any special case results then searches for an Ayah with
        Whoosh if no special cases exist.

        :param value: The ayah text.
        :param translation: The type of translation.
        :param limit: The max limit on the number of queries to return.
        :return: A dictionary with query text, matches, matched terms, and suggestions.
        """
        # Start with special cases
        value = self.adjust_for_special_cases(value)

        # Otherwise execute full search
        with self._ix.searcher() as searcher:
            # Check if its just one word
            is_single_word_query = False
            if len(value.split()) == 1:
                parser = MultifieldParser(
                    ["simple_ayah", "roots", "decomposed_ayah"],
                    self._ix.schema)
                is_single_word_query = True
            else:
                parser = QueryParser("simple_ayah", self._ix.schema)
            # Execute search on a single word
            parser.remove_plugin_class(FieldsPlugin)
            parser.remove_plugin_class(WildcardPlugin)
            query = parser.parse(value)
            results = searcher.search(query, limit=limit)
            if results:
                final_matches = self._get_matches_from_results(
                    results, translation)
                return self._get_response_object_from_params(
                    value, final_matches, value.split(' '), [])

            if not is_single_word_query:
                results = self._parse_and_search(searcher, 'simple_ayah',
                                                 value, limit, True)
                if not results:
                    results = self._parse_and_search(searcher, 'roots', value,
                                                     limit, True)
                    if not results:
                        results = self._parse_and_search(
                            searcher, 'decomposed_ayah', value, limit, True)
                if results:
                    matched_terms = results.matched_terms()
                    first_results = None
                    if len(matched_terms) > 1 and results.scored_length() > 1:
                        if results[1].score > 10:
                            first_results = results
                        results = self._parse_and_search(
                            searcher, 'simple_ayah', results[0]['simple_ayah'],
                            limit)
                    final_matches = self._get_matches_from_results(
                        results, translation)

                    suggestions = []
                    if first_results:
                        for result in [
                                fR for fR in first_results if fR.score > 10
                        ]:
                            suggestions.append(result['simple_ayah'])

                    return self._get_response_object_from_params(
                        value,
                        final_matches,
                        # `term` is a tuple where the second index contains the matching term.
                        [term[1].decode('utf-8') for term in matched_terms],
                        suggestions)

        return self._get_empty_response(value)
예제 #11
0
import whoosh.index
from whoosh.qparser import MultifieldParser, OrGroup, WildcardPlugin

whoosh_idx = whoosh.index.open_dir("whoosh_idx", indexname="nasdaq")
query_parser = MultifieldParser(["title", "article"],
                                schema=whoosh_idx.schema,
                                group=OrGroup)
query_parser.remove_plugin_class(WildcardPlugin)

parsed_query = query_parser.parse("What market does FitBit compete in?")

with whoosh_idx.searcher() as searcher:
    search_results = searcher.search(parsed_query, limit=1)
    [print(sr["title"]) for sr in search_results]
예제 #12
0
    if ALWAYS_REBUILD:
        index = rebuild_index()
    else:
        index = open_dir(search_path)

    #TODO: Doesnt work. Only triggers once.
    #timer = threading.Timer(86400, index.optimize)
    #timer.daemon = True
    #timer.start()

    writer = AsyncWriter(index, 0.5)

    parser = MultifieldParser(["content", "title"], schema, {
        "content": 1.0,
        "title": 2.0
    })
    #parser.add_plugin(SingleQuotePlugin())
    parser.remove_plugin_class(
        FieldsPlugin
    )  #https://whoosh.readthedocs.io/en/latest/api/qparser.html#plug-ins

    def search(q):
        if len(q) < min_search_length:
            return []
        result = []
        with index.searcher() as searcher:
            search_result = searcher.search(parser.parse(q))
            #result = list(map(lambda x: x.fields(), search_result))
            result = [item.fields() for item in search_result]
        return result
예제 #13
0
    def getResult(self, value, translation):
        specialCasesResults = self.getSpecialCasesResults(value, translation)
        if specialCasesResults:
            return specialCasesResults

        with self._ix.searcher() as searcher:
            isSingleWordQuery = False
            if len(value.split()) == 1:
                parser = MultifieldParser(
                        ["simple_ayah", "roots", "decomposed_ayah"], self._ix.schema
                )
                isSingleWordQuery = True
            else:
                parser = QueryParser("simple_ayah", self._ix.schema)
            parser.remove_plugin_class(FieldsPlugin)
            parser.remove_plugin_class(WildcardPlugin)
            query = parser.parse(value)
            results = searcher.search(query, limit=None)
            if results:
                finalMatches = self._getMatchesFromResults(results, translation)
                return self._getResponseObjectFromParams(
                    value,
                    finalMatches,
                    value.split(' '),
                    []
                )

            if not isSingleWordQuery:
                parser = QueryParser("simple_ayah", self._ix.schema, group=OrGroup)
                parser.remove_plugin_class(FieldsPlugin)
                parser.remove_plugin_class(WildcardPlugin)
                query = parser.parse(value)
                results = searcher.search(query, terms=True, limit=None)
                if not results:
                    parser = QueryParser("roots", self._ix.schema, group=OrGroup)
                    parser.remove_plugin_class(FieldsPlugin)
                    parser.remove_plugin_class(WildcardPlugin)
                    query = parser.parse(value)
                    results = searcher.search(query, terms=True, limit=None)
                    if not results:
                        parser = QueryParser("decomposed_ayah", self._ix.schema, group=OrGroup)
                        parser.remove_plugin_class(FieldsPlugin)
                        parser.remove_plugin_class(WildcardPlugin)
                        query = parser.parse(value)
                        results = searcher.search(query, terms=True, limit=None)
                if results:
                    matchedTerms = results.matched_terms()

                    firstResults = None
                    if len(matchedTerms) > 1 and results.scored_length() > 1:
                        if results[1].score > 10:
                            firstResults = results

                        parser = QueryParser("simple_ayah", self._ix.schema)
                        parser.remove_plugin_class(FieldsPlugin)
                        parser.remove_plugin_class(WildcardPlugin)
                        query = parser.parse(results[0]["simple_ayah"])
                        results = searcher.search(query, limit=None)

                    finalMatches = self._getMatchesFromResults(results, translation)

                    suggestions = []
                    if firstResults:
                        for result in [fR for fR in firstResults if fR.score > 10]:
                            suggestions.append(result['simple_ayah'])

                    return self._getResponseObjectFromParams(
                        value,
                        finalMatches,
                        # term is a tuple where the second index contains the matching
                        # term
                        [term[1] for term in matchedTerms],
                        suggestions
                    )

        return self._getEmptyResponse(value)
예제 #14
0
from whoosh.qparser import QueryParser, MultifieldParser, FieldsPlugin

analyzer = NgramAnalyzer(3)
schema = Schema(
    id=STORED,
    category=TEXT(field_boost=3.0),
    #title = TEXT(analyzer, False)
    title=NGRAMWORDS(2, 20, False, 2.0))

index = create_in("search", schema)
#index = open_dir("search")

writer = index.writer()
writer.add_document(id=0, title="Test Words")
writer.add_document(id=1, title="Apple Banana Cucumber")
writer.add_document(id=2, title="Deck Elevator Floor", category="test")
writer.add_document(id=3, title="Pen Pineapple Apple Pen")
writer.commit()

#parser = QueryParser("title", schema)
parser = MultifieldParser(["category", "title"], schema, {
    "category": 3.0,
    "title": 2.0
})
parser.remove_plugin_class(FieldsPlugin)

with index.searcher() as searcher:
    result = searcher.search(parser.parse("Test"))
    print(str(len(result)) + " results")
    for f in result:
        print(f["id"])