Python FuzzyTermPlugin примеры, whoosh.qparser.FuzzyTermPlugin Python примеры использования

Пример #1

0

Показать файл

Файл: server_logic.py Проект: DanGlz/ctrl-fv

def get_video_ids(query):
    levenshtein_distance = 1
    index = open_dir(corpus_index_dir)

    query_terms = query.split(" ")
    fuzzy_query_terms = [
        "{0}~{1}".format(qt, levenshtein_distance) for qt in query_terms
    ]
    fuzzy_query_terms = " ".join(fuzzy_query_terms)

    fuzzy_or_query_parser = qparser.QueryParser("content",
                                                index.schema,
                                                group=qparser.OrGroup)
    fuzzy_or_query_parser.add_plugin(qparser.FuzzyTermPlugin())
    fuzzy_parsed_or_query = fuzzy_or_query_parser.parse(fuzzy_query_terms)

    fuzzy_and_query_parser = qparser.QueryParser("content",
                                                 index.schema,
                                                 group=qparser.AndGroup)
    fuzzy_and_query_parser.add_plugin(qparser.FuzzyTermPlugin())
    fuzzy_parsed_and_query = fuzzy_and_query_parser.parse(fuzzy_query_terms)

    fuzzy_query_parser = Or([fuzzy_parsed_or_query, fuzzy_parsed_and_query])

    with index.searcher(weighting=scoring.TF_IDF()) as searcher:
        results = searcher.search(fuzzy_query_parser, limit=None)
        video_ids = [result.fields()["title"] for result in results]

    return video_ids

Пример #2

0

Показать файл

Файл: bookmark_index.py Проект: zhaoshe/alfred-workflow-chrome-bookmarks

 def n_gram_query(self, query_string):
     og = qparser.OrGroup.factory(0.8)
     parser = qparser.QueryParser(_N_GRAM_FIELD, self._schema, group=og)
     parser.remove_plugin_class(qparser.FieldsPlugin)
     parser.remove_plugin_class(qparser.WildcardPlugin)
     parser.add_plugin(qparser.FuzzyTermPlugin())
     return parser.parse(query_string)

Пример #3

0

Показать файл

Файл: Analyzing.py Проект: Beaxx/monster.de-crawler-analyzer

    def task_frequency_in_index(self) -> list:
        """
        Ermittelt die häufigsten würde für Absätze der Klasse "Ihe Aufgaben"
        """
        task_search_string = \
            "alltag arbeitsgebiet~ are bringen^(-0.5) (aufgabe~ aufgabenbereich~ aufgabenbeschreibung aufgabenfeld " \
            "aufgabengebiet~ aufgabenschwerpunkt~ aufgabenspektrum)^2 bietest challenge chance dein~ dich" \
            "einsatz engagement^0.75 erwartet field (hauptaufgaben~2/12 haupttätigkeit~2/14)^2" \
            "herausforderung ihr~ machst meine (responsibilitie~2/15)^1.5 schwerpunkt~ sie " \
            "(task~/4)^3 themengebiet~2/12 tun umfasst unterstützen (verantwortlichkeit~2/18 verantwortung~2/13)^2" \
            "wirkungsfeld work workspace you~2/3"

        or_group = qparser.OrGroup.factory(1)
        parser = qparser.QueryParser("paragraph_heading",
                                     schema=self.schema,
                                     group=or_group)
        parser.add_plugin(qparser.FuzzyTermPlugin())

        try:
            heading_searcher = self.ix.searcher()
            result_docs = heading_searcher.search(
                parser.parse(task_search_string), limit=None)
            return self.text_term_frequency(result_docs)
        except ZeroDivisionError:
            return ["Für diese Anfrage waren zu wenig Dokumente vorhanden", 0]
        finally:
            self.ix.searcher().close()

Пример #4

0

Показать файл

Файл: Analyzing.py Проект: Beaxx/monster.de-crawler-analyzer

    def requirements_frequency_in_index(self) -> list:
        """
        Ermittelt die häufigsten würde für Absätze der Klasse "Ihre Qualifikationen"
        """
        requirements_search_string = \
            "(anforderung~2/5 anforderungsprofil)^2 anwenderkenntnisse~ are ausmacht auszeichnet background " \
            "bedingungen berufserfahrung~2 bietest bist (bringen~2/5)^2 dein du (einstellungsvoraussetzungen~2/25)^3 " \
            "(erfahrungen~2/9)^1.5 erforderlich~ erwarten (erwartungen~/5)^1.25 essential~ experiences~/7 fachgebiet" \
            "fachliche~/8 fachrichtung fähigkeiten^2 hast have ich ihr~ kannst kenntnisse~/8 (kompetenz~2/9)^2 " \
            "kompetenzprofil meine mitbringen mitbringst optimal~3/7 (pluspunkt~)^1.25 profil~2/4 punkten " \
            "(qualifications~2/13 qualifikation~2/13)^2 reference required requirements^2 sich skills^3 sollten" \
            "solltest steckbrief (stellenanforderung~2/18)^2 talent ticken (voraussetzung~2/13 vorkenntniss~2/12)^2" \
            "wonach worauf you~2/3 zusätzliche"

        or_group = qparser.OrGroup.factory(1)
        parser = qparser.QueryParser("paragraph_heading",
                                     schema=self.schema,
                                     group=or_group)
        parser.add_plugin(qparser.FuzzyTermPlugin())

        try:
            heading_searcher = self.ix.searcher()
            result_docs = heading_searcher.search(
                parser.parse(requirements_search_string), limit=None)
            return self.text_term_frequency(result_docs)
        except ZeroDivisionError:
            return ["Für diese Anfrage waren zu wenig Dokumente vorhanden", 0]
        finally:
            self.ix.searcher().close()

Пример #5

0

Показать файл

Файл: Analyzing.py Проект: Beaxx/monster.de-crawler-analyzer

    def benefits_frequency_in_index(self) -> list:
        """
        Ermittelt die häufigsten würde für Absätze der Klasse "Ihe Vorteile"
        """
        benefits_search_string = \
            "angebot are attraktiv (bekommen bekommst benefit~ bieten)^2 dein~ dich dir^1.5 freuen" \
            "geboten^1.5 ihnen ihr~ erwarten kannst konditionen leistungen mehrwert mein mitarbeitervorteile^3 " \
            "offer our^1.5 perks^2 salary sie ticken uns unser~/4 unternehmensprofil (vorteil~/7)^2 wir worauf freuen" \
            "you~2/3 zusatzleistungen^2 zusätzliche perspective~2/6"

        or_group = qparser.OrGroup.factory(1)
        parser = qparser.QueryParser("paragraph_heading",
                                     schema=self.schema,
                                     group=or_group)
        parser.add_plugin(qparser.FuzzyTermPlugin())

        try:
            heading_searcher = self.ix.searcher()
            result_docs = heading_searcher.search(
                parser.parse(benefits_search_string), limit=None)
            return self.text_term_frequency(result_docs)
        except ZeroDivisionError:
            return ["Für diese Anfrage waren zu wenig Dokumente vorhanden", 0]
        finally:
            self.ix.searcher().close()

Пример #6

0

Показать файл

def indexquery(name, www):
    if name == None:
        return []
    #print("Name: %s" % name)
    ix = index.open_dir("/var/www/restnames/index")
    qp = MultifieldParser([
        "commonname", "database", "tags", "name", "name_part", "country",
        "project", "url"
    ],
                          schema=ix.schema,
                          termclass=FuzzyTerm)
    qp.add_plugin(qparser.FuzzyTermPlugin())
    q = qp.parse(name)
    #q = Every()
    tempvar = []
    with ix.searcher() as searcher:
        results = searcher.search(q, limit=None)
        for hit in results:
            tempvar.append({
                'name': hit["name"],
                'commonname': hit["commonname"],
                'url': hit["url"]
            })
    if not www:
        return tempvar
    else:
        response = Response(
            render_template("searchresults.html", resultlist=tempvar))
        response.headers['content-type'] = 'text/html'
        return response

Пример #7

0

Показать файл

Файл: search.py Проект: WillHalloward/TWI-Bot

async def search(query_str, ctx):
    ix = open_dir("indexdir")
    parser = QueryParser("content", ix.schema)
    parser.add_plugin(qparser.FuzzyTermPlugin())
    parser.add_plugin(GtLtPlugin())
    parser.add_plugin(DateParserPlugin())
    query = parser.parse(query_str)
    print(query)
    with ix.searcher(weighting=scoring.PL2) as searcher:
        results = searcher.search(query, limit=5)
        results.fragmenter = highlight.SentenceFragmenter()
        results.fragmenter.surround = 50
        results.fragmenter.maxchars = 10000
        results.formatter = DiscordBoldFormatter()
        embed = discord.Embed(
            title="Results",
            color=discord.Color(0x3cd63d),
            description="From search: **{}**".format(query_str))
        for hit in results:
            # embed.add_field(name="[{}]({})".format(hit["title"], hit["url"]), value="{}".format(hit.highlights("content")))
            embed.add_field(name="\u200b",
                            value=f"[{hit['title']}]({hit['url']})\n"
                            f"{hit.highlights('content', minscore=0)}",
                            inline=False)
    await ctx.send(embed=embed)

Пример #8

0

Показать файл

def _search_query(search_string, index_dir, search_field):
    search_index = index.open_dir(index_dir)
    searcher = search_index.searcher()
    query_parser = qparser.QueryParser(search_field, schema=search_index.schema)
    query_parser.add_plugin(qparser.PrefixPlugin())
    query_parser.add_plugin(qparser.FuzzyTermPlugin())
    results = searcher.search(query_parser.parse(search_string), limit=20)
    return [dict(result) for result in results]

Пример #9

0

Показать файл

    def _mk_parser(self):
        from whoosh import qparser as qparse

        # use whoosh default query parser for now
        parser = qparse.QueryParser("meta", schema=self.idx_obj.schema)
        parser.add_plugin(qparse.FuzzyTermPlugin())
        parser.remove_plugin_class(qparse.PhrasePlugin)
        parser.add_plugin(qparse.SequencePlugin())
        self.parser = parser

Пример #10

0

Показать файл

Файл: service.py Проект: AlexWoroschilow/AOD-Reader

    def search(self, string=None, fields=["title", "content"]):
        query_parser = qparser.MultifieldParser(fields,
                                                self.ix.schema,
                                                group=qparser.OrGroup)
        query_parser.remove_plugin_class(qparser.PhrasePlugin)
        query_parser.add_plugin(qparser.FuzzyTermPlugin())
        query_parser.add_plugin(qparser.SequencePlugin())

        with self.ix.searcher(weighting=scoring.BM25F) as searcher:
            pattern = query_parser.parse(u'"{}"'.format(string))
            for result in searcher.search(pattern, limit=None):
                yield result

Пример #11

0

Показать файл

    def _mk_parser(self):
        from whoosh import qparser as qparse

        parser = qparse.MultifieldParser(self.idx_obj.schema.names(),
                                         self.idx_obj.schema)
        # XXX: plugin is broken in Debian's whoosh 2.7.0-2, but already fixed
        # upstream
        parser.add_plugin(qparse.FuzzyTermPlugin())
        parser.add_plugin(qparse.GtLtPlugin())
        parser.add_plugin(qparse.SingleQuotePlugin())
        # replace field defintion to allow for colons to be part of a field's name:
        parser.replace_plugin(
            qparse.FieldsPlugin(expr=r"(?P<text>[()<>.\w]+|[*]):"))
        self.parser = parser

Пример #12

0

Показать файл

Файл: search.py Проект: olsfinal/bookcommend

def basic_search(query,
                 query_parse,
                 group=default_group,
                 facet=default_facet,
                 index=default_index):
    searcher = index.searcher()
    parser = QueryParser(query_parse, index.schema, group=group)
    myquery = parser.parse(query)
    parser.remove_plugin_class(qparser.PhrasePlugin)
    parser.add_plugin(qparser.SequencePlugin())
    parser.add_plugin(qparser.FuzzyTermPlugin())
    results = searcher.search(
        myquery, limit=None, sortedby=facet)  # limit为搜索结果的限制，默认为10，详见博客开头的官方文档
    print(results)
    return results

Пример #13

0

Показать файл

Файл: main.py Проект: AvishrantsSh/Django_Ecomm

 def index_search(self, search_query):
     if '/' in search_query:
         return []
     search_query = [token.text for token in my_analyzer(search_query)]
     search_query = '~ '.join(search_query)
     search_query += '~'
     ix=index.open_dir("index")
     with ix.searcher(weighting=scoring.Frequency) as s:
         og = qparser.OrGroup.factory(0.8)
         qp = qparser.QueryParser("name", schema=ix.schema, termclass=MyFuzzyTerm, group=og)
         qp.add_plugin(qparser.FuzzyTermPlugin())
         qp.add_plugin(qparser.SequencePlugin())
         q = qp.parse(search_query)
         results = s.search(q, terms=True,limit=None)
         list=[]
         for res in results:
             # list.append(res['name'])
             list.append(res['id'])
         return list

Пример #14

0

Показать файл

def question_tokens_to_query(keywords):
    """ From a list of keywords and its synonym, transform to whoosh-defined query format """
    # Build query from keywords
    query_str = ""
    for keyword in keywords:
        keywords_str = "("
        for i in range(len(keyword)):
            keywords_str += keyword[i] + " OR "
        keywords_str = keywords_str[:-4]  # Remove the last " OR "
        keywords_str += ")"
        query_str += keywords_str + " "

    # From query string build whoosh-defined query
    ix = index.open_dir(index_dir)
    parser = qparser.MultifieldParser(["title", "content"], ix.schema)
    parser.remove_plugin_class(qparser.PhrasePlugin)
    parser.add_plugin(qparser.SequencePlugin())  # For complex pharse query
    parser.add_plugin(qparser.FuzzyTermPlugin()
                      )  # Search for term that dont have to match exactly
    query = parser.parse(query_str)

    return query

Пример #15

0

Показать файл

Файл: models.py Проект: infrascloudy/realms3

    def __init__(self, index_path, language):
        from whoosh import index as whoosh_index
        from whoosh.fields import Schema, TEXT, ID
        from whoosh import qparser
        from whoosh.highlight import UppercaseFormatter
        from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
        from whoosh.lang import has_stemmer, has_stopwords
        import os

        if not has_stemmer(language) or not has_stopwords(language):
            # TODO Display a warning?
            analyzer = SimpleAnalyzer()
        else:
            analyzer = LanguageAnalyzer(language)

        self.schema = Schema(path=ID(unique=True, stored=True),
                             body=TEXT(analyzer=analyzer))
        self.formatter = UppercaseFormatter()

        self.index_path = index_path

        if not os.path.exists(index_path):
            try:
                os.mkdir(index_path)
            except OSError as e:
                sys.exit("Error creating Whoosh index: %s" % e)

        if whoosh_index.exists_in(index_path):
            try:
                self.search_index = whoosh_index.open_dir(index_path)
            except whoosh_index.IndexError as e:
                sys.exit("Error opening whoosh index: {0}".format(e))
        else:
            self.search_index = whoosh_index.create_in(index_path, self.schema)

        self.query_parser = qparser.MultifieldParser(["body", "path"],
                                                     schema=self.schema)
        self.query_parser.add_plugin(qparser.FuzzyTermPlugin())

Пример #16

0

Показать файл

Файл: moviesearch.py Проект: whelr/MovieSearch

	def search(self, str, afterYear, beforeYear, withDir, fuzzycheck):
		# Open index to search, and create a Query Parser index to search name and descriptions.
		if(fuzzycheck == "checked"):
			print("fuzzycheck is checked")

		movies = list()
		with self.indexer.searcher() as s:
			parser = qparser.QueryParser("title", schema=self.indexer.schema)
			parser.add_plugin(qparser.FuzzyTermPlugin())
			q = parser.parse(str)
			# Use with so searcher is automatically closed afterwards.
			results = s.search(q, limit=None)
			# Get results of search with query, if no results state that.
			if(len(results) == 0):
				results = s.documents()
			for r in results:
				# Take the results and make sure they fit the advanced search flags
				if((int(r["year"]) > int(afterYear)) and (int(r["year"]) < int(beforeYear)) and (withDir in r["director"])):
					pster = "https://lascrucesfilmfest.com/wp-content/uploads/2018/01/no-poster-available-737x1024.jpg"
					if(r["poster"] != "N/A"):
						pster = (r["poster"])
					movies.append(Movie(pster, r["url"], r["title"], r["year"], r["director"], r["plot"]))
			return movies

Пример #17

0

Показать файл

def search_for_results(userquery, corrected_flag=True):
    try:
        if os.path.exists(settings.SEARCH_INDEX_DIR):
            # open index directory and create object for searcher class
            index_reference = open_dir(settings.SEARCH_INDEX_DIR)
            searcher = index_reference.searcher()

            # Applying stemming on the userquery
            stem(userquery)

            # OrGroup.factory - which is useful better for giving relavance rather
            # than naive term frequency of the words in the query
            og = qparser.OrGroup.factory(0.9)

            # initializing Multifield Parser for searching in the multiple fields
            queryparser = qparser.MultifieldParser(
                ["tags", "foss", "title", "outline"],
                schema=index_reference.schema,
                group=og)

            # These Plugins will remove the ability of the user to specify fields to search
            queryparser.remove_plugin_class(qparser.FieldsPlugin)

            # To remove the ability to search for wildcards, which can be harmful to query performance
            queryparser.remove_plugin_class(qparser.WildcardPlugin)

            # can specify a fuzzy term by adding a ~ followed by an optional maximum edit distance (Ex : jav~1)
            queryparser.add_plugin(qparser.FuzzyTermPlugin())

            # Parse the Given Query
            q = queryparser.parse(userquery)

            # For Correcting Spelling with maximum edit distance 3. More than 3 It may affect the performance.
            corrected = searcher.correct_query(q, userquery, maxdist=3)

            # if the corrected query is not matched with the parsed query then it will ask for Did you mean option
            # if the user Entered the query is equal to the suggested query then it will search for the suggested query
            # else the original query of the is user is searched
            corrected_string = None
            if corrected_flag:
                if corrected.query != q:
                    corrected_string = corrected.string

            results = searcher.search(q, terms=True, limit=None)

            # printing the no.of videos found and their title of the video
            print(("%d Videos Found for %s " %
                   (results.scored_length(), userquery)))
            if (results.has_matched_terms() and results.scored_length() > 0):
                collection = []
                for hit in results:
                    row = TutorialResource.objects.filter(
                        tutorial_detail_id=hit['VideoId'],
                        language__name='English').first()
                    collection.append(row)
                return collection, corrected_string
            else:
                return None, corrected_string

    # finally close the searcher object
    finally:
        searcher.close()
    return None, None

Пример #18

0

Показать файл

                           i_d.schema,
                           group=og)
    q_e = MultifieldParser(["title", "content", "extension", "url"],
                           i_e.schema,
                           group=og)
    q_f = MultifieldParser(["title", "content", "extension", "url"],
                           i_f.schema,
                           group=og)
elif operator == 4:
    #print ("in oper 4")
    og = qparser.OrGroup.factory(0.9)
    q_a = MultifieldParser(["title", "content", "tags", "extension", "url"],
                           i_a.schema,
                           group=og)
    q_a.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))
    q_a.add_plugin(qparser.FuzzyTermPlugin())
    q_b = MultifieldParser(["title", "content", "extension", "url"],
                           i_b.schema,
                           group=og)
    q_b.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))
    q_b.add_plugin(qparser.FuzzyTermPlugin())
    q_c = MultifieldParser(["title", "content", "extension", "url"],
                           i_c.schema,
                           group=og)
    q_c.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))
    q_c.add_plugin(qparser.FuzzyTermPlugin())
    q_d = MultifieldParser(["title", "content", "extension", "url", "url"],
                           i_d.schema,
                           group=og)
    q_d.add_plugin(qparser.SequencePlugin("!(~(?P<slop>[1-9][0-9]*))?"))
    q_d.add_plugin(qparser.FuzzyTermPlugin())

Пример #19

0

Показать файл

Файл: search.py Проект: mvdoc/datalad

    def __call__(query=None,
                 dataset=None,
                 force_reindex=False,
                 max_nresults=20,
                 show_keys=False,
                 show_query=False):
        from whoosh import qparser as qparse

        try:
            ds = require_dataset(dataset,
                                 check_installed=True,
                                 purpose='dataset search')
            if ds.id is None:
                raise NoDatasetArgumentFound(
                    "This does not seem to be a dataset (no DataLad dataset ID "
                    "found). 'datalad create --force %s' can initialize "
                    "this repository as a DataLad dataset" % ds.path)
        except NoDatasetArgumentFound:
            for r in _search_from_virgin_install(dataset, query):
                yield r
            return

        # where does the bunny have the eggs?
        index_dir = opj(ds.path, get_git_dir(ds.path), 'datalad',
                        'search_index')

        idx_obj = _get_search_index(index_dir, ds, force_reindex)

        if show_keys:
            definitions_fname = opj(index_dir,
                                    'datalad_term_definitions.json.gz')
            try:
                defs = jsonload(gzopen(definitions_fname))
            except Exception as e:
                lgr.warning(
                    'No term definitions found alongside search index: %s',
                    exc_str(e))
                defs = {}

            for k in idx_obj.schema.names():
                print('{}{}'.format(
                    k,
                    ' {}'.format(defs[k] if isinstance(defs[k], dict) else
                                 '({})'.format(defs[k])) if k in defs else ''))
            return

        if not query:
            return

        with idx_obj.searcher() as searcher:
            # parse the query string, default whoosh parser ATM, could be
            # tailored with plugins
            parser = qparse.MultifieldParser(idx_obj.schema.names(),
                                             idx_obj.schema)
            # XXX: plugin is broken in Debian's whoosh 2.7.0-2, but already fixed
            # upstream
            parser.add_plugin(qparse.FuzzyTermPlugin())
            parser.add_plugin(qparse.GtLtPlugin())
            # replace field defintion to allow for colons to be part of a field's name:
            parser.replace_plugin(
                qparse.FieldsPlugin(expr=r"(?P<text>[()<>:\w]+|[*]):"))
            # for convenience we accept any number of args-words from the
            # shell and put them together to a single string here
            querystr = ' '.join(assure_list(query))
            # this gives a formal whoosh query
            wquery = parser.parse(querystr)

            if show_query:
                print(wquery)
                return
            # perform the actual search
            hits = searcher.search(
                wquery,
                terms=True,
                limit=max_nresults if max_nresults > 0 else None)
            # cheap way to get an approximate number of hits, without an expensive
            # scoring of all items
            # disabled: unreliable estimate, often confusing
            #nhits = hits.estimated_min_length()
            # report query stats
            topstr = '{} top {}'.format(
                max_nresults, single_or_plural('match', 'matches',
                                               max_nresults))
            lgr.info('Query completed in {} sec.{}'.format(
                hits.runtime, ' Reporting {}.'.format((
                    'up to ' + topstr) if max_nresults > 0 else 'all matches')
                if not hits.is_empty() else ' No matches.'))

            if not hits:
                return

            nhits = 0
            for hit in hits:
                res = dict(
                    action='search',
                    status='ok',
                    logger=lgr,
                    refds=ds.path,
                    # normpath to avoid trailing dot
                    path=normpath(opj(ds.path, hit['path'])),
                    query_matched={
                        assure_unicode(k): assure_unicode(v) if isinstance(
                            v, unicode_srctypes) else v
                        for k, v in hit.matched_terms()
                    },
                    metadata={
                        k: v
                        for k, v in hit.fields().items()
                        if k not in ('path', 'parentds')
                    })
                if 'parentds' in hit:
                    res['parentds'] = normpath(opj(ds.path, hit['parentds']))
                yield res
                nhits += 1

            if max_nresults and nhits == max_nresults:
                lgr.info("Reached the limit of {}, there could be more which "
                         "were not reported.".format(topstr))

Пример #20

0

Показать файл

Файл: search_algo.py Проект: rohithrv/CSE-591-Adaptive-Web

def query(my_query):
    schema = Schema(href=ID(stored=True),
                    title=TEXT(field_boost=2.0, stored=True),
                    page_content=TEXT(analyzer=StemmingAnalyzer(),
                                      stored=True))

    ix = index.open_dir("index_dir")

    # qp = QueryParser("page_content", schema=ix.schema)
    mparser = qparser.MultifieldParser(["title", "page_content"],
                                       schema=schema,
                                       group=qparser.OrGroup)
    my_query_new = ""
    ff_q = []
    final_my_query_new = ""
    analyzer = StandardAnalyzer()
    for t in analyzer(my_query):
        # print(t.text)
        my_query_new += " " + str(t.text)
        ff_q.append(str(t.text))
    my_stop_words = [
        "when", "http", "all", "but", "how", "so", "which", "has", "is", "it",
        "do", "than", "some", "what", "was", "class", "my", "there", "both"
        "would", "even"
    ]
    for words in ff_q:
        if words not in my_stop_words:
            (words)
            final_my_query_new += " " + str(words)
    q = mparser.parse(final_my_query_new)

    mparser.add_plugin(qparser.FuzzyTermPlugin())
    final_link = []
    final_highlights = []
    with ix.searcher() as s:
        results = s.search(q, limit=10)
        results.fragmenter.surround = 50
        # print(results)
        for r in results:
            final_link.append(r['href'])
            final_highlights.append(str(r.highlights("page_content")))
    return final_link, final_highlights


#
# def search(request):
#     cur_dir = os.path.normpath('test_xlsx_file_here')
#
#     for sub_dir, dirs, files in os.walk(cur_dir):
#         for file in files:
#             if file.endswith(".xlsx") and not file.startswith("~"):
#                 name = os.path.join(sub_dir, file)
#                 wb = load_workbook(str(name))
#                 ws = wb.get_sheet_by_name(wb.get_sheet_names()[0])
#     type = []
#     text_code = []
#     for i in range(2, ws.max_row + 1):
#         temp = ""
#         if ws.cell(row=i, column=2).value != None:
#             temp = temp + str(ws.cell(row=i, column=2).value)
#         if ws.cell(row=i, column=3).value != None:
#             temp = temp + str(ws.cell(row=i, column=3).value)
#         type.append(str(ws.cell(row=i, column=1).value))
#         text_code.append(temp)
#     #create_index()
#     final_l1, final_h1 = query(text_code[0])
#     final_l2, final_h2 = query(text_code[1])
#     final_l3, final_h3 = query(text_code[2])
#     final_l4, final_h4 = query(text_code[3])
#     final_l5, final_h5 = query(text_code[4])
#     final_l6, final_h6 = query(text_code[5])
#     final_l7, final_h7 = query(text_code[6])
#     final_l8, final_h8 = query(text_code[7])
#     final_l9, final_h9 = query(text_code[8])
#     final_l10, final_h10 = query(text_code[9])
#
#
#     context = {
#         'type': type,
#         'text_code': text_code,
#         'final_link1': final_l1,
#         'final_link2': final_l2,
#         'final_link3': final_l3,
#         'final_link4': final_l4,
#         'final_link5': final_l5,
#         'final_link6': final_l6,
#         'final_link7': final_l7,
#         'final_link8': final_l8,
#         'final_link9': final_l9,
#         'final_link10': final_l10,
#         'final_h1': final_h1,
#         'final_h2': final_h2,
#         'final_h3': final_h3,
#         'final_h4': final_h4,
#         'final_h5': final_h5,
#         'final_h6': final_h6,
#         'final_h7': final_h7,
#         'final_h8': final_h8,
#         'final_h9': final_h9,
#         'final_h10': final_h10,
#
#     }
#     return render(request, 'index.html', context)

Python FuzzyTermPlugin примеры использования