def _create_query_for_field(self, field, value, analyzer=None): """generate a field query this functions creates a field->value query :param field: The fieldname to be used :type field: str :param value: The wanted value of the field :type value: str :param analyzer: The analyzer to be used Possible analyzers are: - :attr:`CommonDatabase.ANALYZER_TOKENIZE` the field value is splitted to be matched word-wise - :attr:`CommonDatabase.ANALYZER_PARTIAL` the field value must start with the query string - :attr:`CommonDatabase.ANALYZER_EXACT` keep special characters and the like :type analyzer: bool :return: resulting query object :rtype: PyLucene.Query """ if analyzer is None: analyzer = self.analyzer if analyzer == self.ANALYZER_EXACT: analyzer_obj = PyLucene.KeywordAnalyzer() else: value = self._escape_term_value(value) analyzer_obj = PyLucene.StandardAnalyzer() qp = PyLucene.QueryParser(field, analyzer_obj) if (analyzer & self.ANALYZER_PARTIAL > 0): # PyLucene uses explicit wildcards for partial matching value += "*" return qp.parse(value)
def search(self, q, models=None, order_by=RELEVANCE, limit=None, offset=None): """Perform a search.""" original_query = q q = query.convert(original_query, LuceneQueryConverter) if models: models_queries = [] for m in models: if hasattr(m, "_meta"): models_queries.append('%s:"%s"' % (MODEL_FIELD, m._meta)) else: models_queries.append('%s:"%s"' % (MODEL_FIELD, m)) q += ' AND (%s)' % (' '.join(models_queries)) searcher = PyLucene.IndexSearcher(settings.SEARCH_INDEX_PATH) analyzer = PorterStemmerAnalyzer() compiled_query = PyLucene.QueryParser(CONTENTS_FIELD, analyzer).parse(q) if order_by is RELEVANCE: sort = PyLucene.Sort.RELEVANCE else: reversed = order_by.startswith('-') sort_field = PyLucene.SortField(order_by.lstrip('-'), reversed) sort = PyLucene.Sort(sort_field) hits = searcher.search(compiled_query, sort) return self._get_search_results(original_query, hits, limit, offset)
def search(self, phrase, keywords=None, sortAscending=True): if not phrase and not keywords: return [] # XXX Colons in phrase will screw stuff up. Can they be quoted or # escaped somehow? Probably by using a different QueryParser. if keywords: fieldPhrase = u' '.join(u':'.join((k, v)) for (k, v) in keywords.iteritems()) if phrase: phrase = phrase + u' ' + fieldPhrase else: phrase = fieldPhrase phrase = phrase.translate({ ord(u'@'): u' ', ord(u'-'): u' ', ord(u'.'): u' ' }) qp = PyLucene.QueryParser('text', self.analyzer) qp.setDefaultOperator(qp.Operator.AND) query = qp.parseQuery(phrase) sort = PyLucene.Sort(PyLucene.SortField('sortKey', not sortAscending)) try: hits = self.searcher.search(query, sort) except PyLucene.JavaError, err: if 'no terms in field sortKey' in str(err): hits = [] else: raise
def search_node_by_name2(self, name): if self.searcher is None: self.searcher = PyLucene.IndexSearcher("index") query = PyLucene.QueryParser(COLUMN_NAME, PyLucene.StandardAnalyzer()).parse(name) hits = self.searcher.search(query) result = self.hits_to_list(hits) return result
def search_node_by_attribute2(self, att_type, att_value): if self.searcher is None: self.searcher = PyLucene.IndexSearcher("index") analyzer = PyLucene.StandardAnalyzer() if att_type != "" and att_value == "": parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_TYPE_NID, analyzer) query = parser.parse(att_type) elif att_type == "" and att_value != "": parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_VALUE, analyzer) query = parser.parse(att_value) elif att_type != "" and att_value != "": parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_VALUE, analyzer) query = parser.parse(COLUMN_ATTRIBUTE_TYPE_NID + ":" + att_type + " AND " + att_value) hits = self.searcher.search(query) result = self.hits_to_list(hits) return result
def _create_query_for_string(self, text, require_all=True, analyzer=None): """generate a query for a plain term of a string query basically this function parses the string and returns the resulting query :param text: The query string :type text: str :param require_all: boolean operator (True -> AND (default) / False -> OR) :type require_all: bool :param analyzer: The analyzer to be used Possible analyzers are: - :attr:`CommonDatabase.ANALYZER_TOKENIZE` the field value is splitted to be matched word-wise - :attr:`CommonDatabase.ANALYZER_PARTIAL` the field value must start with the query string - :attr:`CommonDatabase.ANALYZER_EXACT` keep special characters and the like :type analyzer: bool :return: resulting query object :rtype: PyLucene.Query """ if analyzer is None: analyzer = self.analyzer if analyzer == self.ANALYZER_EXACT: # exact matching - no substitution ... # for PyLucene: nothing special is necessary pass # don't care about special characters ... if analyzer == self.ANALYZER_EXACT: analyzer_obj = self.ExactAnalyzer() else: text = _escape_term_value(text) analyzer_obj = PyLucene.StandardAnalyzer() qp = PyLucene.QueryParser(analyzer=analyzer_obj) if require_all: qp.setDefaultOperator(qp.Operator.AND) else: qp.setDefaultOperator(qp.Operator.OR) if (analyzer & self.ANALYZER_PARTIAL) > 0: # PyLucene uses explicit wildcards for partial matching text += "*" return qp.parse(text)
def search(self, query_string='', require_visible=True, allow_curated=True): hits = [] query_string = str(query_string) self.logger.info('Performing search: %s' % query_string) disassembled_query = disassemble_user_query(query_string) self.logger.debug('Disassembled query: %s' % str(disassembled_query)) reassembled_query = '+(%s)' % reassemble_user_query(disassembled_query) self.logger.debug('Reassembled query: %s', reassembled_query) if not allow_curated: reassembled_query += \ ' -record-status:%s' % canary.loader.QueuedRecord.STATUS_CURATED if require_visible: reassembled_query += ' +article-type:[%s TO %s]' % \ (Study.ARTICLE_TYPES['traditional'], Study.ARTICLE_TYPES['curated']) reassembled_query += ' +record-status:%s' % \ canary.loader.QueuedRecord.STATUS_CURATED try: searcher = PyLucene.IndexSearcher( PyLucene.FSDirectory.getDirectory( self.context.config.search_index_dir, False)) analyzer = PyLucene.StandardAnalyzer() query_parser = PyLucene.QueryParser('all', analyzer) query_parser.setOperator(PyLucene.QueryParser.DEFAULT_OPERATOR_AND) query = query_parser.parseQuery(reassembled_query) self.logger.info('Search query: %s', query) hits = searcher.search(query) return hits, searcher except Exception, e: self.logger.error('Search failed: %s', e) #self.logger.error(traceback.format_stack()) if hits \ and searcher: return hits, searcher else: return [], None
def search(self, query, fields=FEED_ENTRY_FIELDS, analyzer=None, store=None): if not query or len(query.strip()) == 0 or len(fields) == 0: return None analyzer = analyzer or self.analyzer if store is None: store = self.entry_modifier.store if len(fields) > 1: qp = lucene.MultiFieldQueryParser(fields, analyzer) else: qp = lucene.QueryParser(fields[0], analyzer) q = qp.parse(query) searcher = lucene.IndexSearcher(store) hits = searcher.search(q, lucene.Sort.RELEVANCE) return HitHolder(hits, searcher)
def _create_query_for_string(self, text, require_all=True, analyzer=None): """generate a query for a plain term of a string query basically this function parses the string and returns the resulting query @param text: the query string @type text: str @param require_all: boolean operator (True -> AND (default) / False -> OR) @type require_all: bool @param analyzer: the analyzer to be used possible analyzers are: - L{CommonDatabase.ANALYZER_TOKENIZE} the field value is splitted to be matched word-wise - L{CommonDatabase.ANALYZER_PARTIAL} the field value must start with the query string - L{CommonDatabase.ANALYZER_EXACT} keep special characters and the like @type analyzer: bool @return: resulting query object @rtype: PyLucene.Query """ if analyzer is None: analyzer = self.analyzer if analyzer == self.ANALYZER_EXACT: analyzer_obj = PyLucene.KeywordAnalyzer() else: text = _escape_term_value(text) analyzer_obj = PyLucene.StandardAnalyzer() qp = PyLucene.QueryParser(UNNAMED_FIELD_NAME, analyzer_obj) if (analyzer & self.ANALYZER_PARTIAL > 0): # PyLucene uses explicit wildcards for partial matching text += "*" if require_all: qp.setDefaultOperator(qp.Operator.AND) else: qp.setDefaultOperator(qp.Operator.OR) return qp.parse(text)