Пример #1
0
    def visitSEARCH_CLAUSE(self, node):
        # possible children:
        # CQL_QUERY
        # SEARCH_TERM
        # INDEX, RELATION, SEARCH_TERM
        firstChild = node.children[0].name
        results = CqlVisitor.visitSEARCH_CLAUSE(self, node)
        if firstChild == 'SEARCH_TERM':
            (unqualifiedRhs, ) = results
            if unqualifiedRhs == '*':
                return MatchAllDocsQuery()
            subQueries = []
            for fieldname, boost in self._unqualifiedTermFields:
                subQuery = self._termOrPhraseQuery(fieldname, unqualifiedRhs)
                if isinstance(
                        subQuery, PhraseQuery
                ) and not self._fieldRegistry.phraseQueryPossible(fieldname):
                    continue
                subQuery.setBoost(boost)
                subQueries.append(subQuery)
            if len(subQueries) == 1:
                query = subQueries[0]
            else:
                query = BooleanQuery()
                for subQuery in subQueries:
                    query.add(subQuery, BooleanClause.Occur.SHOULD)
            return query
        elif firstChild == 'INDEX':
            (left, (relation, boost), right) = results
            if relation in [
                    '==', 'exact'
            ] or (relation == '=' and self._fieldRegistry.isUntokenized(left)):
                query = TermQuery(self._createTerm(left, right))
            elif relation == '=':
                query = self._termOrPhraseQuery(left, right)
            elif relation in ['<', '<=', '>=', '>']:
                query = self._termRangeQuery(left, relation, right)
            else:
                raise UnsupportedCQL("'%s' not supported for the field '%s'" %
                                     (relation, left))

            query.setBoost(boost)
            return query
        else:
            ((query, ), ) = results
            return query
Пример #2
0
    def visitSEARCH_CLAUSE(self, node):
        # possible children:
        # CQL_QUERY
        # SEARCH_TERM
        # INDEX, RELATION, SEARCH_TERM
        firstChild = node.children[0].name
        results = CqlVisitor.visitSEARCH_CLAUSE(self, node)
        if firstChild == 'SEARCH_TERM':
            (unqualifiedRhs,) = results
            if unqualifiedRhs == '*':
                return MatchAllDocsQuery()
            subQueries = []
            for fieldname, boost in self._unqualifiedTermFields:
                subQuery = self._termOrPhraseQuery(fieldname, unqualifiedRhs)
                if isinstance(subQuery, PhraseQuery) and not self._fieldRegistry.phraseQueryPossible(fieldname):
                    continue
                subQuery.setBoost(boost)
                subQueries.append(subQuery)
            if len(subQueries) == 1:
                query = subQueries[0]
            else:
                query = BooleanQuery()
                for subQuery in subQueries:
                    query.add(subQuery, BooleanClause.Occur.SHOULD)
            return query
        elif firstChild == 'INDEX':
            (left, (relation, boost), right) = results
            if relation in ['==', 'exact'] or (relation == '=' and self._fieldRegistry.isUntokenized(left)):
                query = TermQuery(self._createTerm(left, right))
            elif relation == '=':
                query = self._termOrPhraseQuery(left, right)
            elif relation in ['<','<=','>=','>']:
                query = self._termRangeQuery(left, relation, right)
            else:
                raise UnsupportedCQL("'%s' not supported for the field '%s'" % (relation, left))

            query.setBoost(boost)
            return query
        else:
            ((query,),) = results
            return query
Пример #3
0
def run(searcher, analyzer, command, judge):

    print "\nSearching for: " + command
    # command = unicode(command, 'UTF-8')
    if command == '':
        return
    commands = " ".join(jieba.cut(command)).split()
    commands_notseg = command.split()
    querys = BooleanQuery()
    querys1 = BooleanQuery()
    querys2 = BooleanQuery()
    for i in commands:
        query = QueryParser(Version.LUCENE_CURRENT, "name", analyzer).parse(i)
        querys.setBoost(math.sqrt(len(i)))
        querys.add(query, BooleanClause.Occur.MUST)  #分词匹配
        querys1.add(query, BooleanClause.Occur.SHOULD)
    scoreDocs = searcher.search(querys, 50).scoreDocs
    if len(scoreDocs) == 0:
        querys = BooleanQuery()
        for i in commands:
            for j in i:
                query = QueryParser(Version.LUCENE_CURRENT, "not_seg",
                                    analyzer).parse(j)
                querys.add(query, BooleanClause.Occur.MUST)  #逐字匹配
                querys1.add(query, BooleanClause.Occur.SHOULD)
        scoreDocs = searcher.search(querys, 50).scoreDocs
    for i in commands:
        query = QueryParser(Version.LUCENE_CURRENT, "comment",
                            analyzer).parse(i)
        query.setBoost(0.5)
        querys2.add(query, BooleanClause.Occur.SHOULD)  #评论匹配
    if len(commands) > 1:
        querys2.add(querys1, BooleanClause.Occur.MUST)
    querys2.add(querys, BooleanClause.Occur.SHOULD)
    querys = BooleanQuery()
    for i in commands_notseg:
        query = QueryParser(Version.LUCENE_CURRENT, "type", analyzer).parse(i)
        query.setBoost(2)
        querys.add(query, BooleanClause.Occur.SHOULD)  #标签匹配

    if len(commands_notseg) > 1:
        querys.add(querys1, BooleanClause.Occur.MUST)
    querys2.add(querys, BooleanClause.Occur.SHOULD)
    scoreDocs = searcher.search(querys2, 20).scoreDocs
    print "%s total matching documents." % len(scoreDocs)
    res = []
    temp = []
    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        temp = [
            doc.get("org"),
            doc.get("type"),
            doc.get("price"),
            doc.get("imgsrc"),
            doc.get('comment_notseg'),
            doc.get('ISBN'),
            doc.get('ID')
        ]
        print temp
        for i in SearchFiles.main(doc.get("org")):
            temp.append(i)
        res.append(temp)
    res1 = []
    if judge == True:
        for i in range(len(res)):
            temp = res[i]
            tempres = main(temp[0], False)  #迭代寻找相似书籍
            if len(tempres) >= 4:
                for j in tempres[1:4]:
                    temp.append(j)
            elif len(tempres) == 1:
                for j in range(3):
                    temp.append(tempres[0])
            else:
                for j in range(1, len(tempres)):
                    temp.append(tempres[j])
                for j in range(len(tempres), 4):
                    temp.append(tempres[-1])
            res1.append(temp)
    return res1