示例#1
0
    def _create_query_for_field(self, field, value, analyzer=None):
        """generate a field query

        this functions creates a field->value query

        :param field: The fieldname to be used
        :type field: str
        :param value: The wanted value of the field
        :type value: str
        :param analyzer: The analyzer to be used
                         Possible analyzers are:
                         - :attr:`CommonDatabase.ANALYZER_TOKENIZE`
                           the field value is splitted to be matched word-wise
                         - :attr:`CommonDatabase.ANALYZER_PARTIAL`
                           the field value must start with the query string
                         - :attr:`CommonDatabase.ANALYZER_EXACT`
                           keep special characters and the like
        :type analyzer: bool
        :return: resulting query object
        :rtype: PyLucene.Query
        """
        if analyzer is None:
            analyzer = self.analyzer
        if analyzer == self.ANALYZER_EXACT:
            analyzer_obj = PyLucene.KeywordAnalyzer()
        else:
            value = self._escape_term_value(value)
            analyzer_obj = PyLucene.StandardAnalyzer()
        qp = PyLucene.QueryParser(field, analyzer_obj)
        if (analyzer & self.ANALYZER_PARTIAL > 0):
            # PyLucene uses explicit wildcards for partial matching
            value += "*"
        return qp.parse(value)
示例#2
0
    def search(self,
               q,
               models=None,
               order_by=RELEVANCE,
               limit=None,
               offset=None):
        """Perform a search."""
        original_query = q
        q = query.convert(original_query, LuceneQueryConverter)
        if models:
            models_queries = []
            for m in models:
                if hasattr(m, "_meta"):
                    models_queries.append('%s:"%s"' % (MODEL_FIELD, m._meta))
                else:
                    models_queries.append('%s:"%s"' % (MODEL_FIELD, m))
            q += ' AND (%s)' % (' '.join(models_queries))

        searcher = PyLucene.IndexSearcher(settings.SEARCH_INDEX_PATH)
        analyzer = PorterStemmerAnalyzer()
        compiled_query = PyLucene.QueryParser(CONTENTS_FIELD,
                                              analyzer).parse(q)

        if order_by is RELEVANCE:
            sort = PyLucene.Sort.RELEVANCE
        else:
            reversed = order_by.startswith('-')
            sort_field = PyLucene.SortField(order_by.lstrip('-'), reversed)
            sort = PyLucene.Sort(sort_field)

        hits = searcher.search(compiled_query, sort)
        return self._get_search_results(original_query, hits, limit, offset)
示例#3
0
    def search(self, phrase, keywords=None, sortAscending=True):
        if not phrase and not keywords:
            return []

        # XXX Colons in phrase will screw stuff up.  Can they be quoted or
        # escaped somehow?  Probably by using a different QueryParser.
        if keywords:
            fieldPhrase = u' '.join(u':'.join((k, v))
                                    for (k, v) in keywords.iteritems())
            if phrase:
                phrase = phrase + u' ' + fieldPhrase
            else:
                phrase = fieldPhrase
        phrase = phrase.translate({
            ord(u'@'): u' ',
            ord(u'-'): u' ',
            ord(u'.'): u' '
        })
        qp = PyLucene.QueryParser('text', self.analyzer)
        qp.setDefaultOperator(qp.Operator.AND)
        query = qp.parseQuery(phrase)

        sort = PyLucene.Sort(PyLucene.SortField('sortKey', not sortAscending))

        try:
            hits = self.searcher.search(query, sort)
        except PyLucene.JavaError, err:
            if 'no terms in field sortKey' in str(err):
                hits = []
            else:
                raise
示例#4
0
    def search_node_by_name2(self, name):
        if self.searcher is None:
            self.searcher = PyLucene.IndexSearcher("index")

        query = PyLucene.QueryParser(COLUMN_NAME,
                                     PyLucene.StandardAnalyzer()).parse(name)
        hits = self.searcher.search(query)
        result = self.hits_to_list(hits)

        return result
示例#5
0
    def search_node_by_attribute2(self, att_type, att_value):
        if self.searcher is None:
            self.searcher = PyLucene.IndexSearcher("index")

        analyzer = PyLucene.StandardAnalyzer()

        if att_type != "" and att_value == "":
            parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_TYPE_NID, analyzer)
            query = parser.parse(att_type)
        elif att_type == "" and att_value != "":
            parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_VALUE, analyzer)
            query = parser.parse(att_value)
        elif att_type != "" and att_value != "":
            parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_VALUE, analyzer)
            query = parser.parse(COLUMN_ATTRIBUTE_TYPE_NID + ":" + att_type +
                                 " AND " + att_value)

        hits = self.searcher.search(query)
        result = self.hits_to_list(hits)

        return result
示例#6
0
    def _create_query_for_string(self, text, require_all=True,
                analyzer=None):
        """generate a query for a plain term of a string query

        basically this function parses the string and returns the resulting
        query

        :param text: The query string
        :type text: str
        :param require_all: boolean operator
                            (True -> AND (default) / False -> OR)
        :type require_all: bool
        :param analyzer: The analyzer to be used
                         Possible analyzers are:
                         - :attr:`CommonDatabase.ANALYZER_TOKENIZE`
                           the field value is splitted to be matched word-wise
                         - :attr:`CommonDatabase.ANALYZER_PARTIAL`
                           the field value must start with the query string
                         - :attr:`CommonDatabase.ANALYZER_EXACT`
                           keep special characters and the like
        :type analyzer: bool
        :return: resulting query object
        :rtype: PyLucene.Query
        """
        if analyzer is None:
            analyzer = self.analyzer
        if analyzer == self.ANALYZER_EXACT:
            # exact matching - no substitution ...
            # for PyLucene: nothing special is necessary
            pass
        # don't care about special characters ...
        if analyzer == self.ANALYZER_EXACT:
            analyzer_obj = self.ExactAnalyzer()
        else:
            text = _escape_term_value(text)
            analyzer_obj = PyLucene.StandardAnalyzer()
        qp = PyLucene.QueryParser(analyzer=analyzer_obj)
        if require_all:
            qp.setDefaultOperator(qp.Operator.AND)
        else:
            qp.setDefaultOperator(qp.Operator.OR)
        if (analyzer & self.ANALYZER_PARTIAL) > 0:
            # PyLucene uses explicit wildcards for partial matching
            text += "*"
        return qp.parse(text)
示例#7
0
    def search(self,
               query_string='',
               require_visible=True,
               allow_curated=True):

        hits = []
        query_string = str(query_string)
        self.logger.info('Performing search: %s' % query_string)
        disassembled_query = disassemble_user_query(query_string)
        self.logger.debug('Disassembled query: %s' % str(disassembled_query))
        reassembled_query = '+(%s)' % reassemble_user_query(disassembled_query)
        self.logger.debug('Reassembled query: %s', reassembled_query)

        if not allow_curated:
            reassembled_query += \
                ' -record-status:%s' % canary.loader.QueuedRecord.STATUS_CURATED

        if require_visible:
            reassembled_query += ' +article-type:[%s TO %s]' % \
                (Study.ARTICLE_TYPES['traditional'],
                Study.ARTICLE_TYPES['curated'])
            reassembled_query += ' +record-status:%s' % \
                canary.loader.QueuedRecord.STATUS_CURATED

        try:
            searcher = PyLucene.IndexSearcher(
                PyLucene.FSDirectory.getDirectory(
                    self.context.config.search_index_dir, False))
            analyzer = PyLucene.StandardAnalyzer()
            query_parser = PyLucene.QueryParser('all', analyzer)
            query_parser.setOperator(PyLucene.QueryParser.DEFAULT_OPERATOR_AND)
            query = query_parser.parseQuery(reassembled_query)
            self.logger.info('Search query: %s', query)
            hits = searcher.search(query)
            return hits, searcher
        except Exception, e:
            self.logger.error('Search failed: %s', e)
            #self.logger.error(traceback.format_stack())
            if hits \
                and searcher:
                return hits, searcher
            else:
                return [], None
示例#8
0
    def search(self,
               query,
               fields=FEED_ENTRY_FIELDS,
               analyzer=None,
               store=None):
        if not query or len(query.strip()) == 0 or len(fields) == 0:
            return None
        analyzer = analyzer or self.analyzer
        if store is None:
            store = self.entry_modifier.store

        if len(fields) > 1:
            qp = lucene.MultiFieldQueryParser(fields, analyzer)
        else:
            qp = lucene.QueryParser(fields[0], analyzer)
        q = qp.parse(query)

        searcher = lucene.IndexSearcher(store)
        hits = searcher.search(q, lucene.Sort.RELEVANCE)
        return HitHolder(hits, searcher)
示例#9
0
    def _create_query_for_string(self, text, require_all=True,
            analyzer=None):
        """generate a query for a plain term of a string query

        basically this function parses the string and returns the resulting
        query

        @param text: the query string
        @type text: str
        @param require_all: boolean operator
            (True -> AND (default) / False -> OR)
        @type require_all: bool
        @param analyzer: the analyzer to be used
            possible analyzers are:
             -  L{CommonDatabase.ANALYZER_TOKENIZE}
                    the field value is splitted to be matched word-wise
             -  L{CommonDatabase.ANALYZER_PARTIAL}
                    the field value must start with the query string
             -  L{CommonDatabase.ANALYZER_EXACT}
                    keep special characters and the like
        @type analyzer: bool
        @return: resulting query object
        @rtype: PyLucene.Query
        """
        if analyzer is None:
            analyzer = self.analyzer
        if analyzer == self.ANALYZER_EXACT:
            analyzer_obj = PyLucene.KeywordAnalyzer()
        else:
            text = _escape_term_value(text)
            analyzer_obj = PyLucene.StandardAnalyzer()
        qp = PyLucene.QueryParser(UNNAMED_FIELD_NAME, analyzer_obj)
        if (analyzer & self.ANALYZER_PARTIAL > 0):
            # PyLucene uses explicit wildcards for partial matching
            text += "*"
        if require_all:
            qp.setDefaultOperator(qp.Operator.AND)
        else:
            qp.setDefaultOperator(qp.Operator.OR)
        return qp.parse(text)