def _search_query(search_string, index_dir, search_field): search_index = index.open_dir(index_dir) searcher = search_index.searcher() query_parser = qparser.QueryParser(search_field, schema=search_index.schema) query_parser.add_plugin(qparser.PrefixPlugin()) query_parser.add_plugin(qparser.FuzzyTermPlugin()) results = searcher.search(query_parser.parse(search_string), limit=20) return [dict(result) for result in results]
def search_index(words): xg_duanluo = [] with ix.searcher() as s: qp = QueryParser('duanluo', schema=ix.schema, group=qparser.OrGroup) qp.remove_plugin_class(qparser.WildcardPlugin) qp.add_plugin(qparser.PrefixPlugin()) for word in words: q = qp.parse(u'{}'.format(word)) results = s.search(q, limit=10) for i in results: xg_duanluo.append((i['id'], i['duanluo'])) return xg_duanluo
def search_index(words): xg_words = [] with ix.searcher() as s: qp = QueryParser('section', schema=ix.schema, group=qparser.OrGroup) # 可以使用通配符搜索 qp.remove_plugin_class(qparser.WildcardPlugin) qp.add_plugin(qparser.PrefixPlugin()) for word in words: q = qp.parse(u'{}'.format(word)) results = s.search(q, limit=10) for i in results: xg_words.append(i['section']) return xg_words
def _search(self, query_string, search_model, user, global_and_search=False): index = self.get_index(search_model=search_model) id_list = [] with index.searcher() as searcher: search_string = [] if 'q' in query_string: # Emulate full field set search for search_field in self.get_search_model_fields(search_model=search_model): search_string.append( '{}:({})'.format(search_field.get_full_name(), query_string['q']) ) else: for key, value in query_string.items(): if value: search_string.append( '{}:({})'.format(key, value) ) global_logic_string = ' AND ' if global_and_search else ' OR ' search_string = global_logic_string.join(search_string) logger.debug('search_string: %s', search_string) parser = qparser.QueryParser( fieldname='_', schema=index.schema ) parser.remove_plugin_class(cls=qparser.WildcardPlugin) parser.add_plugin(pin=qparser.PrefixPlugin()) query = parser.parse(text=search_string) results = searcher.search( q=query, limit=setting_results_limit.value ) logger.debug('results: %s', results) for result in results: id_list.append(result['id']) queryset = search_model.get_queryset().filter( id__in=id_list ).distinct() return SearchBackend.limit_queryset(queryset=queryset)
def search_index(words): xg_words = [] with ix.searcher() as s: # group=qparser.OrGroup 表示可匹配任意查询词,而不是所有查询词都匹配才能出结果 qp = QueryParser('section', schema=ix.schema, group=qparser.OrGroup) # 下面两行表示可以使用通配符搜索,如"窗前*月光" qp.remove_plugin_class(qparser.WildcardPlugin) qp.add_plugin(qparser.PrefixPlugin()) for word in words: q = qp.parse(u'%s' % word) # limit:代表返回多少条搜索结果 results = s.search(q, limit=10) for i in results: xg_words.append(i['section']) # print (word,i['section']) return xg_words
def search_index(words): xg_part = [] with ix.searcher() as s: # group = qparser.OrGroup 表示可匹配任意查询词,而不是所有查询词都匹配才出结果 qp = QueryParser('part', schema=ix.schema, group=qparser.OrGroup) # 下面两行表示可以使用通配符,如“窗前*月光” qp.remove_plugin_class(qparser.WildcardPlugin) qp.add_plugin(qparser.PrefixPlugin()) # 随机数 num = random.randint(3, 7) for word in words: q = qp.parse(u'%s' % word) # limit 表示多少条搜索结果 results = s.search(q, limit=num) count = 0 for i in results: if count > 0: # 防止等于本身 xg_part.append((i['pid'], i['part'])) count += 1 return xg_part
def base_query(): assert request.path == '/index' #print(dict(request.form)["query"][0]) #print(dict(request.form)) query_sentence = str(dict(request.form)["query"][0]) logging.info("Query sentence: %s" % query_sentence) res = [] with ix.searcher() as searcher: # 对输入的查询文本进行解析,如果存在按域查询的需求则区分按域查询,默认采用多属性查询模式 # mark 表示是否需要高亮学院查询区域,默认情况下需要 highlight_xy = True # 默认的多域查询 query = qparser.MultifieldParser( ["content", "title", "mtext", "xueyuan"], ix.schema) if query_sentence.endswith("$姓名$"): # 按名字查询 query = qparser.SimpleParser("title", ix.schema) query_sentence = query_sentence.strip('$姓名$') elif query_sentence.endswith("$学院$"): # 按学院查询 query = qparser.SimpleParser("xueyuan", ix.schema) query_sentence = query_sentence.strip('$学院$') elif query_sentence.endswith("$网页$"): # 按网页内容查询 query = qparser.SimpleParser("content", ix.schema) query_sentence = query_sentence.strip('$网页$') #print(query_sentence) # 引入查询解析器插件 query.add_plugin(qparser.WildcardPlugin) # query.remove_plugin_class(qparser.WildcardPlugin) query.add_plugin(qparser.PrefixPlugin()) query.add_plugin(qparser.OperatorsPlugin) query.add_plugin(qparser.RegexPlugin) query.add_plugin(qparser.PhrasePlugin) # 解析得到查询器 q = query.parse(query_sentence) logging.info("Query parse result: %s" % str(q)) print(q) # 获取查询结果 result = searcher.search(q, limit=20) # print(result) # 设置碎片的属性 # Allow larger fragments my_cf = highlight.ContextFragmenter(maxchars=200, surround=30) hf = highlight.HtmlFormatter(tagname='em', classname='match', termclass='term') hi = highlight.Highlighter(fragmenter=my_cf, formatter=hf) for hit in result: print(hit["picpath"]) print(hit["title"]) print(escape(hi.highlight_hit(hit, "content"))) if hit['picpath'] == '#': if highlight_xy: res.append({ "title": hit['title'], "xueyuan": Markup(hi.highlight_hit(hit, "xueyuan")), "url": hit["url"], 'shotpath': hit['shotpath'], "content": Markup(hi.highlight_hit(hit, "content")), "parenturl": hit["parenturl"], "picpath": '#', "pagerank": scores[url_dict[hit["url"]]] }) else: res.append({ "title": hit['title'], "xueyuan": hit["xueyuan"], "url": hit["url"], 'shotpath': hit['shotpath'], "content": Markup(hi.highlight_hit(hit, "content")), "parenturl": hit["parenturl"], "picpath": '#', "pagerank": scores[url_dict[hit["url"]]] }) else: if highlight_xy: res.append({ "title": hit['title'], "xueyuan": Markup(hi.highlight_hit(hit, "xueyuan")), "url": hit["url"], 'shotpath': hit['shotpath'], "content": Markup(hi.highlight_hit(hit, "content")), "parenturl": hit["parenturl"], "picpath": "images/%s/%s" % (hit['picpath'].split('/')[-3], hit['picpath'].split('/')[-1]), "pagerank": scores[url_dict[hit["url"]]] }) else: res.append({ "title": hit['title'], "xueyuan": hit["xueyuan"], "url": hit["url"], 'shotpath': hit['shotpath'], "content": Markup(hi.highlight_hit(hit, "content")), "parenturl": hit["parenturl"], "picpath": "images/%s/%s" % (hit['picpath'].split('/')[-3], hit['picpath'].split('/')[-1]), "pagerank": scores[url_dict[hit["url"]]] }) print(len(result)) print(res) count = len(result) if count == 0: logging.warning("%d,没有查询到相关内容!" % 404) return "没有查询到相关内容!", 404 else: # 记录查询日志 log = "Response: " for item in res: log = log + " (name:%s,url:%s) " % (item["title"], item["url"]) logging.info(log) # # 基于page rank 对链接进行排序 # res.sort(key=lambda k:(k.get("pagerank",0)),reverse = True) # print(res) mysession["data"] = res # 使用会话session传递参数 return jsonify({"url": "/display/%d&%s" % (count, query_sentence)})