def testEquality(self): bq1 = BooleanQuery() bq1.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD) bq1.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD) nested1 = BooleanQuery() nested1.add(TermQuery(Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD) nested1.add(TermQuery(Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD) bq1.add(nested1, BooleanClause.Occur.SHOULD) bq2 = BooleanQuery() bq2.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD) bq2.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD) nested2 = BooleanQuery() nested2.add(TermQuery(Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD) nested2.add(TermQuery(Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD) bq2.add(nested2, BooleanClause.Occur.SHOULD) self.assert_(bq1.equals(bq2))
def testBraces(self): self.assertConversion(TermQuery(Term('unqualified', 'cats')), '(cats)') innerQuery = BooleanQuery() innerQuery.add(TermQuery(Term('unqualified', 'cats')), BooleanClause.Occur.MUST) innerQuery.add(TermQuery(Term('unqualified', 'dogs')), BooleanClause.Occur.MUST) outerQuery = BooleanQuery() outerQuery.add(innerQuery, BooleanClause.Occur.SHOULD) outerQuery.add(TermQuery(Term('unqualified', 'mice')), BooleanClause.Occur.SHOULD) self.assertConversion(outerQuery, '(cats AND dogs) OR mice')
def testParenthesisMust2(self): q3 = BooleanQuery() q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD)) q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD)) q4 = BooleanQuery() q4.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD)) q4.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD)) q2 = BooleanQuery() q2.add(q3, BooleanClause.Occur.SHOULD) q2.add(q4, BooleanClause.Occur.MUST) self.assertEqual(1, self.search(q2))
def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") command = unicode(command, 'UTF-8') if command == '': return print print "Searching for:", command querys = BooleanQuery() command_dict = parseCommand(command) for k, v in command_dict.iteritems(): query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, 50).scoreDocs print "%s total matching documents." % len(scoreDocs) for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) print '------------------------------------------------' print 'title:', doc.get('title') print 'url:', doc.get('url') print 'src:', doc.get('src')
def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") if command == '': print "=== [ QUIT ] ===" return print print "Searching for:", command command_dict = parseCommand(command) querys = BooleanQuery() for k, v in command_dict.iteritems(): if 'contents' == k: v = " ".join(jieba.cut(v)) if DEBUG_MODE: print k, v query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, MAX_ITEMS_PER_PAGE).scoreDocs print "%s total matching documents." % len(scoreDocs) for idx, scoreDoc in enumerate(scoreDocs): doc = searcher.doc(scoreDoc.doc) # # explanation = searcher.explain(query, scoreDoc.doc) print "-- #", str(idx + 1), "--" print '\ttitle:\t', doc.get("title") print '\turl:\t', doc.get("url") print '\tpath:\t', doc.get("path") print '\tname:\t', doc.get("name") print
def run_pic(valueFromOut, searcher, analyzer): command = valueFromOut seg_list = jieba.cut(command) command = " ".join(seg_list) if command == '': return result = [] command_dict = parseCommand(command) querys = BooleanQuery() for k, v in command_dict.iteritems(): query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, 10).scoreDocs print "%s total matching documents." % len(scoreDocs) for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) partResult = {} partResult['title'] = doc.get('title') partResult['url'] = doc.get('url') partResult['imgurl'] = doc.get('imgurl') result.append(partResult) return result
def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") command = unicode(command, 'UTF-8') if command == '': return print print "Searching for:", command #朱莉与茱莉娅 # final = jieba.cut(command) # query = QueryParser(Version.LUCENE_CURRENT, "contents", # analyzer).parse(' '.join(final)) querys = BooleanQuery() command_dict = parseCommand(command) for k,v in command_dict.iteritems(): if(k=='site'): t = Term('url','*'+v.strip()+'*') query = WildcardQuery(t) else: query = QueryParser(Version.LUCENE_CURRENT, k,analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, 50).scoreDocs print "%s total matching documents." % len(scoreDocs) for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) print '------------------------------------------' #print 'path:', doc.get("path"), 'name:', doc.get("name"),'site:', doc.get('site') print 'title:',doc.get('title'), print 'url:',doc.get('url')
def search(**kwargs): vm_env.attachCurrentThread() query = BooleanQuery() print("Searched keywords:") for field_name, keywords in kwargs.items(): # assert field_name in SearchConfig.searchable_fields # keywords = list(filter(None, jieba.cut(keywords, cut_all=True))) keywords = list(filter(None, (k.strip() for k in jieba.cut_for_search(keywords)))) for kw in keywords: print(kw) # construct query for kw in keywords: q = QueryParser(Version.LUCENE_CURRENT, field_name, analyzer).parse(kw) query.add(q, BooleanClause.Occur.SHOULD) if field_name == 'keywords': for kw in keywords: q = QueryParser(Version.LUCENE_CURRENT, 'ent_name', analyzer).parse(kw) query.add(q, BooleanClause.Occur.SHOULD) # search scoreDocs = searcher.search(query, 50).scoreDocs return [retrieve(searcher.doc(scoreDoc.doc)) for scoreDoc in scoreDocs]
def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") command = unicode(command) if command == '': return command_dict = parseCommand(command) seg_list = jieba.cut(command_dict['contents']) command_dict['contents'] = (" ".join(seg_list)) querys = BooleanQuery() for k, v in command_dict.iteritems(): query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) print print "Searching for:", command scoreDocs = searcher.search(querys, 50).scoreDocs print "%s total matching documents." % len(scoreDocs) for i, scoreDoc in enumerate(scoreDocs): doc = searcher.doc(scoreDoc.doc) print 'path:', doc.get("path"), \ '\nname:', doc.get("name"), \ '\ntitle:', doc.get("title"), \ "url:",doc.get("url"), \ "\nsite:",doc.get("site"), "\n"
def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") if command == '': return print print "Searching for:", command command_dict = parseCommand(command) querys = BooleanQuery() for k, v in command_dict.iteritems(): query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, 50).scoreDocs finalDocTitles = [] for i, scoreDoc in enumerate(scoreDocs): doc = searcher.doc(scoreDoc.doc) if (doc.get("title") not in finalDocTitles): print 'title:', doc.get("title"), 'url:', doc.get( "url"), 'score:', scoreDoc.score, 'contents:', doc.get( 'contents') finalDocTitles.append(doc.get("title")) # print 'explain:', searcher.explain(query, scoreDoc.doc) print "%s total matching documents." % len(finalDocTitles)
def run_img(command): vm_env = lucene.getVMEnv() vm_env.attachCurrentThread() STORE_DIR = "index2" directory = SimpleFSDirectory(File(STORE_DIR)) searcher = IndexSearcher(DirectoryReader.open(directory)) analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT) querys = BooleanQuery() query_content = QueryParser(Version.LUCENE_CURRENT, "urlcontent", analyzer).parse(command) query_title = QueryParser(Version.LUCENE_CURRENT, "title", analyzer).parse(command) querys.add(query_content, BooleanClause.Occur.SHOULD) querys.add(query_title, BooleanClause.Occur.SHOULD) scoreDocs = searcher.search(querys, 50).scoreDocs if len(scoreDocs) == 0: print "WARNING: No result" result = [] for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) print doc.get("title") data = {} data['title'] = doc.get('title') data['url'] = doc.get('url') data['imgurl'] = doc.get('imgurl') result.append(data) return result
def text_search(command): envir.vm_env.attachCurrentThread() command_dict = parseCommand(command, "contents") querys = BooleanQuery() for k, v in command_dict.iteritems(): query = QueryParser(Version.LUCENE_CURRENT, k, envir.analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = envir.text_searcher.search(querys, 30).scoreDocs res = [] query_highlight = QueryParser(Version.LUCENE_CURRENT, k, envir.analyzer).parse(command_dict["contents"]) myhighlighter = Highlighter( SimpleHTMLFormatter(), QueryScorer(query_highlight)) myhighlighter.setTextFragmenter(SimpleFragmenter(50)) for scoreDoc in scoreDocs: # find texts which are around the keyword doc = envir.text_searcher.doc(scoreDoc.doc) text = doc.get("contents") key_text = "".join((myhighlighter.getBestFragments( envir.analyzer, "contents", text, 3))) key_text = re.sub('\s', '', key_text) temp = [doc.get("title"), doc.get('url'), key_text] res.append(temp) return res
def createDrilldownQuery(self, luceneQuery, drilldownQueries): q = BooleanQuery(True) if luceneQuery: q.add(luceneQuery, BooleanClause.Occur.MUST) for field, path in drilldownQueries: q.add(TermQuery(self._fieldRegistry.makeDrilldownTerm(field, path)), BooleanClause.Occur.MUST); return q
def lucene_sample_query_parse(sampleq, ftypes): fields = [] queries = [] booleans = [] bq = BooleanQuery() for query_tuple in sampleq: (field, op_, value) = re.split(snapconf.RANGE_QUERY_OPS, query_tuple) m=snapconf.RANGE_QUERY_FIELD_PATTERN.search(query_tuple) if m is None or field is None: continue op=m.group(1) if op not in snapconf.operators: sys.stderr.write("bad operator %s in range query,exiting\n" % (str(op))) sys.exit(-1) field_w_type = snapconf.SAMPLE_HEADER_FIELDS_TYPE_MAP[field] (fieldtypechar, ftype_method) = ftypes[field_w_type] #range query if fieldtypechar == 'i' or fieldtypechar == 'f': bq.add(lucene_range_query_parse(field_w_type, op, value, fieldtypechar, ftype_method), BOOLEAN_OCCUR) #phrase query elif ' ' in value or '\t' in value: pquery = PhraseQuery() [pquery.add(Term(field_w_type, v.lower())) for v in re.split(r'\s+',value)] #force exact phrase matching only pquery.setSlop(0) bq.add(pquery, BOOLEAN_OCCUR) #term query else: bq.add(TermQuery(Term(field_w_type, value.lower())), BOOLEAN_OCCUR) sys.stderr.write("value + fields: %s %s\n" % (value.lower(), field_w_type)) return bq
def search_kw(kw, mode): vm_env.attachCurrentThread() lists = [] l = jieba.cut(kw) query = BooleanQuery() for i in l: ii = QueryParser(Version.LUCENE_CURRENT, "introduction", analyzer).parse(i) query.add(ii, BooleanClause.Occur.MUST) if mode: sf = SortField("score", SortField.Type.STRING, True) s = Sort(sf) else: sf = SortField("comments", SortField.Type.FLOAT, True) s = Sort(sf) scoreDocs = searcher1.search(query, 20, s).scoreDocs for scoreDoc in scoreDocs: movie = [] doc = searcher1.doc(scoreDoc.doc) #### movie.append(doc.get("url")) movie.append(doc.get("picture")) movie.append(doc.get("title")) movie.append(doc.get("score")) movie.append(doc.get("genre")) movie.append(doc.get("stars")) movie.append(doc.get("comments")) ##### lists.append(movie) return lists
def func1(genre, year): vm_env.attachCurrentThread() lists = [] query = BooleanQuery() if genre != "111": item = QueryParser(Version.LUCENE_CURRENT, "genre", analyzer).parse(genre) query.add(item, BooleanClause.Occur.MUST) if year != "111": item = QueryParser(Version.LUCENE_CURRENT, "year", analyzer).parse(year) query.add(item, BooleanClause.Occur.MUST) sf = SortField("score", SortField.Type.STRING, True) s = Sort(sf) scoreDocs = searcher1.search(query, 20, s).scoreDocs for scoreDoc in scoreDocs: movie = [] doc = searcher1.doc(scoreDoc.doc) movie.append(doc.get("url")) movie.append(doc.get("picture")) movie.append(doc.get("title")) movie.append(doc.get("score")) movie.append(doc.get("genre")) movie.append(doc.get("stars")) movie.append(doc.get("comments")) lists.append(movie) return lists
def get_or_query(self, queries): """Creates an OR Boolean query from multiple Lucene queries.""" # empty boolean query with Similarity.coord() disabled bq = BooleanQuery(False) for q in queries: bq.add(q, BooleanClause.Occur.SHOULD) return bq
def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") command = unicode(command, 'utf-8') if command == '': return print "Searching for:", command command_dict = parseCommand(command) querys = BooleanQuery() for k, v in command_dict.iteritems(): print k, v query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, 10).scoreDocs print "%s total matching documents." % len(scoreDocs) for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) ## explanation = searcher.explain(query, scoreDoc.doc) print "------------------------" print 'path:', doc.get("path") print 'name:', doc.get("name") print 'title:', doc.get('title') print 'url:', doc.get("url")
def main(): _vm = lucene.initVM(vmargs=['-Djava.awt.headless=true']) query = BooleanQuery() query.add(MatchAllDocsQuery(), BooleanClause.Occur.MUST) query.add(TermQuery(Term('type', 'user')), BooleanClause.Occur.MUST) i = 0 with zh_iatd.create_searcher() as searcher: with open('pagerank_data.txt', 'w') as fout: reslst = searcher.searcher.search(query, 100) initval = 1.0 / reslst.totalHits while len(reslst.scoreDocs) > 0: for x in reslst.scoreDocs: realdoc = searcher.searcher.doc(x.doc) obj = document_to_obj(realdoc) if not obj.data.followed_users is None: print '{0:8}'.format(i), ' user', obj.index, len( obj.data.followed_users) fout.write('{0}\t{1}\t{2}\n'.format( obj.index, initval, ' '.join( (x.encode('utf8') for x in obj.data.followed_users)))) else: print '{0:8}'.format(i), 'I user', obj.index i += 1 reslst = searcher.searcher.searchAfter(reslst.scoreDocs[-1], query, 100)
def ch_seach(self, command_dict, target_range=None, targets=('title', 'author', 'text', 'likes', 'imgurl', 'label')): res = [] querys = BooleanQuery() for key, value in command_dict.items(): if key not in ['author', 'title', 'label', 'content']: continue query = QueryParser(Version.LUCENE_CURRENT, key, self.Analyzer).parse(utils.jieba_seg(value[0])) if value[1]: querys.add(query, BooleanClause.Occur.MUST) else: querys.add(query, BooleanClause.Occur.SHOULD) totalDocs = self.chSearcher.search(querys, utils.MAX_RESULTS).scoreDocs total_match = len(totalDocs) if target_range is None: scoreDocs = totalDocs[:] else: scoreDocs = totalDocs[max(0, int(target_range[0]) ):min(total_match, int(target_range[1]))] del totalDocs for i, scoreDoc in enumerate(scoreDocs): doc = self.chSearcher.doc(scoreDoc.doc) res.append({key: doc.get(key) for key in targets}) return total_match, res
def testWildcards(self): query = PrefixQuery(Term('unqualified', 'prefix')) self.assertConversion(query, 'prefix*') self.assertConversion(query, 'PREfix*') query = PrefixQuery(Term('field', 'prefix')) self.assertConversion(query, 'field="PREfix*"') self.assertConversion(query, 'field=prefix*') query = PrefixQuery(Term('field', 'oc-0123')) self.assertConversion(query, 'field="oc-0123*"') query = TermQuery(Term('field', 'p')) self.assertConversion(query, 'field="P*"') #only prefix queries for now query = TermQuery(Term('field', 'post')) self.assertConversion(query, 'field="*post"') query = TermQuery(Term('field', 'prefix')) self.assertConversion(query, 'field=prefix**') result = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings()).compose(parseCql("prefix*")) query = BooleanQuery() left = PrefixQuery(Term("field0", "prefix")) left.setBoost(0.2) query.add(left, BooleanClause.Occur.SHOULD) right = PrefixQuery(Term("field1", "prefix")) right.setBoost(2.0) query.add(right, BooleanClause.Occur.SHOULD) self.assertEquals(type(query), type(result)) self.assertEquals(repr(query), repr(result))
def testFlat(self): q = BooleanQuery() q.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD)) q.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD)) q.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD)) q.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD)) self.assertEqual(1, self.search(q))
def visitSCOPED_CLAUSE(self, node): clause = CqlVisitor.visitSCOPED_CLAUSE(self, node) if len(clause) == 1: return clause[0] lhs, operator, rhs = clause query = BooleanQuery() query.add(lhs, LHS_OCCUR[operator]) query.add(rhs, RHS_OCCUR[operator]) return query
def search_dianping(province, kind, query): STORE_DIR = "index" vm_env.attachCurrentThread() #base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) directory = SimpleFSDirectory(File(STORE_DIR)) searcher = IndexSearcher(DirectoryReader.open(directory)) analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT) allowed_opt = ['food', 'foodshop'] if kind not in allowed_opt: return None if query == '': return None command = '%s:%s province:%s' % (kind, query, province) command = unicode(command, 'utf8', 'ignore') command_dict = parseCommand(command) querys = BooleanQuery() for k, v in command_dict.iteritems(): query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, 50).scoreDocs #比较评分 max_rank = 0 best_shop = '' for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) cur_shop = doc.get("foodshop").split()[-1] cur_rank = float(doc.get('rank')) if cur_rank > max_rank: max_rank = cur_rank best_shop = cur_shop result = {} for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) cur_shop = doc.get("foodshop").split()[-1] cur_rank = float(doc.get('rank')) if cur_rank == max_rank: result['name'] = cur_shop.encode('utf8', 'ignore') result['rank'] = doc.get('rank').encode('utf8', 'ignore') result['food'] = doc.get('food').encode('utf8', 'ignore') result['location'] = doc.get('location').encode('utf8', 'ignore') result['tel'] = doc.get('tel').encode('utf8', 'ignore') result['environment_score'] = doc.get('environment_score').encode( 'utf8', 'ignore') result['flavour_score'] = doc.get('flavour_score').encode( 'utf8', 'ignore') result['service_score'] = doc.get('service_score').encode( 'utf8', 'ignore') result['price_level'] = doc.get('price_level').encode( 'utf8', 'ignore') del searcher return result
def firstsearch(searcher, analyzer, command): if len(command.split()) > 1: return [] querys = BooleanQuery() query = QueryParser(Version.LUCENE_CURRENT, "name_not_cut", analyzer).parse(command) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, 1000).scoreDocs return scoreDocs
def run(searcher, analyzer, command): commandsplit = command.split() maxlen = len(commandsplit[0]) maxindex = 0 for i in range(len(commandsplit)): if maxlen < len(commandsplit[i]): maxlen = len(commandsplit[i]) maxindex = i commands = " ".join(jieba.cut(command.split()[maxindex])).split() querys = BooleanQuery() for i in commands: try: query = QueryParser(Version.LUCENE_CURRENT, "name", analyzer).parse(i) querys.add(query, BooleanClause.Occur.MUST) except: continue scoreDocs = searcher.search(querys, 50).scoreDocs if len(scoreDocs) == 0: querys = BooleanQuery() for i in commands: for j in i: try: query = QueryParser(Version.LUCENE_CURRENT, "not_seg", analyzer).parse(j) querys.add(query, BooleanClause.Occur.MUST) except: continue scoreDocs = searcher.search(querys, 50).scoreDocs temp = [] if len(scoreDocs) > 0: doc = searcher.doc(scoreDocs[0].doc) temp = [ doc.get("org"), doc.get("path"), doc.get("price"), doc.get("imgsrc") ] else: temp = ['unknown'] * 4 return temp
def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") #command = unicode(command, 'GBK') command = unicode(command, 'utf8') if command == '': return print print 'searching for : ' + command command_dict = parseCommand(command) querys = BooleanQuery() for k, v in command_dict.iteritems(): query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v) querys.add(query, BooleanClause.Occur.MUST) scoreDocs = searcher.search(querys, 50).scoreDocs print "%s total matching documents." % len(scoreDocs) #比较评分 max_rank = 0 best_shop = '' for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) cur_shop = doc.get("foodshop").split()[-1] cur_rank = float(doc.get('rank')) if cur_rank > max_rank: max_rank = cur_rank best_shop = cur_shop result = {} for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) cur_shop = doc.get("foodshop").split()[-1] cur_rank = float(doc.get('rank')) if cur_rank == max_rank: result['name'] = cur_shop.encode('utf8', 'ignore') result['rank'] = doc.get('rank').encode('utf8', 'ignore') result['food'] = doc.get('food').encode('utf8', 'ignore') result['location'] = doc.get('location').encode( 'utf8', 'ignore') result['tel'] = doc.get('tel').encode('utf8', 'ignore') result['environment_score'] = doc.get( 'environment_score').encode('utf8', 'ignore') result['flavour_score'] = doc.get('flavour_score').encode( 'utf8', 'ignore') result['service_score'] = doc.get('service_score').encode( 'utf8', 'ignore') result['price_level'] = doc.get('price_level').encode( 'utf8', 'ignore') print result
def search(self, value, stopwords=[], min_length=0): words = [ x for x in nltk.word_tokenize(value) if x not in stopwords and len(x) > min_length ] query = BooleanQuery() query1 = PhraseQuery() query1.setSlop(2) query2 = PhraseQuery() query2.setSlop(2) query3 = PhraseQuery() query3.setSlop(2) for word in words: query1.add(Term("wiki_name_analyzed", word)) query2.add(Term("wiki_name_analyzed_nopunct", word)) query3.add(Term("wiki_name_analyzed_nopunct_nostop", word)) query.add(query1, BooleanClause.Occur.SHOULD) query.add(query2, BooleanClause.Occur.SHOULD) query.add(query3, BooleanClause.Occur.SHOULD) scoreDocs = self.searcher.search(query, self.num_docs_to_return).scoreDocs if len(scoreDocs) > 0: #self.printDocs(scoreDocs) return scoreDocs query = BooleanQuery() for word in words: query_word = BooleanQuery() query_word.add(TermQuery(Term("wiki_name_analyzed", word)), BooleanClause.Occur.SHOULD) query_word.add(TermQuery(Term("wiki_name_analyzed_nopunct", word)), BooleanClause.Occur.SHOULD) query_word.add( TermQuery(Term("wiki_name_analyzed_nopunct_nostop", word)), BooleanClause.Occur.SHOULD) query.add(query_word, BooleanClause.Occur.MUST) scoreDocs = self.searcher.search(query, self.num_docs_to_return).scoreDocs return scoreDocs
def testUnqualifiedTermFields(self): composer = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings()) ast = parseCql("value") result = composer.compose(ast) query = BooleanQuery() left = TermQuery(Term("field0", "value")) left.setBoost(0.2) query.add(left, BooleanClause.Occur.SHOULD) right = TermQuery(Term("field1", "value")) right.setBoost(2.0) query.add(right, BooleanClause.Occur.SHOULD) self.assertEquals(type(query), type(result)) self.assertEquals(repr(query), repr(result))
def func2(name): vm_env.attachCurrentThread() lists = [] query = BooleanQuery() item = QueryParser(Version.LUCENE_CURRENT, "name", analyzer).parse(name) query.add(item, BooleanClause.Occur.MUST) scoreDocs = searcher2.search(query, 20).scoreDocs for scoreDoc in scoreDocs: list = [] doc = searcher2.doc(scoreDoc.doc) list.append(doc.get("picture")) list.append(doc.get("url")) list.append(doc.get("name")) lists.append(list) return lists