def visitSEARCH_CLAUSE(self, node): # possible children: # CQL_QUERY # SEARCH_TERM # INDEX, RELATION, SEARCH_TERM firstChild = node.children[0].name results = CqlVisitor.visitSEARCH_CLAUSE(self, node) if firstChild == 'SEARCH_TERM': (unqualifiedRhs, ) = results if unqualifiedRhs == '*': return MatchAllDocsQuery() subQueries = [] for fieldname, boost in self._unqualifiedTermFields: subQuery = self._termOrPhraseQuery(fieldname, unqualifiedRhs) if isinstance( subQuery, PhraseQuery ) and not self._fieldRegistry.phraseQueryPossible(fieldname): continue subQuery.setBoost(boost) subQueries.append(subQuery) if len(subQueries) == 1: query = subQueries[0] else: query = BooleanQuery() for subQuery in subQueries: query.add(subQuery, BooleanClause.Occur.SHOULD) return query elif firstChild == 'INDEX': (left, (relation, boost), right) = results if relation in [ '==', 'exact' ] or (relation == '=' and self._fieldRegistry.isUntokenized(left)): query = TermQuery(self._createTerm(left, right)) elif relation == '=': query = self._termOrPhraseQuery(left, right) elif relation in ['<', '<=', '>=', '>']: query = self._termRangeQuery(left, relation, right) else: raise UnsupportedCQL("'%s' not supported for the field '%s'" % (relation, left)) query.setBoost(boost) return query else: ((query, ), ) = results return query
def visitSEARCH_CLAUSE(self, node): # possible children: # CQL_QUERY # SEARCH_TERM # INDEX, RELATION, SEARCH_TERM firstChild = node.children[0].name results = CqlVisitor.visitSEARCH_CLAUSE(self, node) if firstChild == 'SEARCH_TERM': (unqualifiedRhs,) = results if unqualifiedRhs == '*': return MatchAllDocsQuery() subQueries = [] for fieldname, boost in self._unqualifiedTermFields: subQuery = self._termOrPhraseQuery(fieldname, unqualifiedRhs) if isinstance(subQuery, PhraseQuery) and not self._fieldRegistry.phraseQueryPossible(fieldname): continue subQuery.setBoost(boost) subQueries.append(subQuery) if len(subQueries) == 1: query = subQueries[0] else: query = BooleanQuery() for subQuery in subQueries: query.add(subQuery, BooleanClause.Occur.SHOULD) return query elif firstChild == 'INDEX': (left, (relation, boost), right) = results if relation in ['==', 'exact'] or (relation == '=' and self._fieldRegistry.isUntokenized(left)): query = TermQuery(self._createTerm(left, right)) elif relation == '=': query = self._termOrPhraseQuery(left, right) elif relation in ['<','<=','>=','>']: query = self._termRangeQuery(left, relation, right) else: raise UnsupportedCQL("'%s' not supported for the field '%s'" % (relation, left)) query.setBoost(boost) return query else: ((query,),) = results return query
def run(searcher, analyzer, command, judge): print "\nSearching for: " + command # command = unicode(command, 'UTF-8') if command == '': return commands = " ".join(jieba.cut(command)).split() commands_notseg = command.split() querys = BooleanQuery() querys1 = BooleanQuery() querys2 = BooleanQuery() for i in commands: query = QueryParser(Version.LUCENE_CURRENT, "name", analyzer).parse(i) querys.setBoost(math.sqrt(len(i))) querys.add(query, BooleanClause.Occur.MUST) #分词匹配 querys1.add(query, BooleanClause.Occur.SHOULD) scoreDocs = searcher.search(querys, 50).scoreDocs if len(scoreDocs) == 0: querys = BooleanQuery() for i in commands: for j in i: query = QueryParser(Version.LUCENE_CURRENT, "not_seg", analyzer).parse(j) querys.add(query, BooleanClause.Occur.MUST) #逐字匹配 querys1.add(query, BooleanClause.Occur.SHOULD) scoreDocs = searcher.search(querys, 50).scoreDocs for i in commands: query = QueryParser(Version.LUCENE_CURRENT, "comment", analyzer).parse(i) query.setBoost(0.5) querys2.add(query, BooleanClause.Occur.SHOULD) #评论匹配 if len(commands) > 1: querys2.add(querys1, BooleanClause.Occur.MUST) querys2.add(querys, BooleanClause.Occur.SHOULD) querys = BooleanQuery() for i in commands_notseg: query = QueryParser(Version.LUCENE_CURRENT, "type", analyzer).parse(i) query.setBoost(2) querys.add(query, BooleanClause.Occur.SHOULD) #标签匹配 if len(commands_notseg) > 1: querys.add(querys1, BooleanClause.Occur.MUST) querys2.add(querys, BooleanClause.Occur.SHOULD) scoreDocs = searcher.search(querys2, 20).scoreDocs print "%s total matching documents." % len(scoreDocs) res = [] temp = [] for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) temp = [ doc.get("org"), doc.get("type"), doc.get("price"), doc.get("imgsrc"), doc.get('comment_notseg'), doc.get('ISBN'), doc.get('ID') ] print temp for i in SearchFiles.main(doc.get("org")): temp.append(i) res.append(temp) res1 = [] if judge == True: for i in range(len(res)): temp = res[i] tempres = main(temp[0], False) #迭代寻找相似书籍 if len(tempres) >= 4: for j in tempres[1:4]: temp.append(j) elif len(tempres) == 1: for j in range(3): temp.append(tempres[0]) else: for j in range(1, len(tempres)): temp.append(tempres[j]) for j in range(len(tempres), 4): temp.append(tempres[-1]) res1.append(temp) return res1