def search(self, q, models=None, order_by=RELEVANCE, limit=None, offset=None): """Perform a search.""" original_query = q q = query.convert(original_query, LuceneQueryConverter) if models: models_queries = [] for m in models: if hasattr(m, "_meta"): models_queries.append('%s:"%s"' % (MODEL_FIELD, m._meta)) else: models_queries.append('%s:"%s"' % (MODEL_FIELD, m)) q += ' AND (%s)' % (' '.join(models_queries)) searcher = PyLucene.IndexSearcher(settings.SEARCH_INDEX_PATH) analyzer = PorterStemmerAnalyzer() compiled_query = PyLucene.QueryParser(CONTENTS_FIELD, analyzer).parse(q) if order_by is RELEVANCE: sort = PyLucene.Sort.RELEVANCE else: reversed = order_by.startswith('-') sort_field = PyLucene.SortField(order_by.lstrip('-'), reversed) sort = PyLucene.Sort(sort_field) hits = searcher.search(compiled_query, sort) return self._get_search_results(original_query, hits, limit, offset)
def _index_refresh(self): """re-read the indexer database""" try: if self.reader is None or self.searcher is None: self.reader = PyLucene.IndexReader.open(self.location) self.searcher = PyLucene.IndexSearcher(self.reader) elif self.index_version != self.reader.getCurrentVersion( \ self.location): self.searcher.close() self.reader.close() self.reader = PyLucene.IndexReader.open(self.location) self.searcher = PyLucene.IndexSearcher(self.reader) self.index_version = self.reader.getCurrentVersion(self.location) except PyLucene.JavaError,e: # TODO: add some debugging output? #self.errorhandler.logerror("Error attempting to read index - try reindexing: "+str(e)) pass
def search_node_by_name2(self, name): if self.searcher is None: self.searcher = PyLucene.IndexSearcher("index") query = PyLucene.QueryParser(COLUMN_NAME, PyLucene.StandardAnalyzer()).parse(name) hits = self.searcher.search(query) result = self.hits_to_list(hits) return result
def openReadIndex(self): luceneDir = self.store.newDirectory(self.indexDirectory) if not luceneDir.exists(): self.openWriteIndex().close() fsdir = PyLucene.FSDirectory.getDirectory(luceneDir.path, False) try: searcher = PyLucene.IndexSearcher(fsdir) except PyLucene.JavaError, e: raise IndexCorrupt()
def search(self, query_string='', require_visible=True, allow_curated=True): hits = [] query_string = str(query_string) self.logger.info('Performing search: %s' % query_string) disassembled_query = disassemble_user_query(query_string) self.logger.debug('Disassembled query: %s' % str(disassembled_query)) reassembled_query = '+(%s)' % reassemble_user_query(disassembled_query) self.logger.debug('Reassembled query: %s', reassembled_query) if not allow_curated: reassembled_query += \ ' -record-status:%s' % canary.loader.QueuedRecord.STATUS_CURATED if require_visible: reassembled_query += ' +article-type:[%s TO %s]' % \ (Study.ARTICLE_TYPES['traditional'], Study.ARTICLE_TYPES['curated']) reassembled_query += ' +record-status:%s' % \ canary.loader.QueuedRecord.STATUS_CURATED try: searcher = PyLucene.IndexSearcher( PyLucene.FSDirectory.getDirectory( self.context.config.search_index_dir, False)) analyzer = PyLucene.StandardAnalyzer() query_parser = PyLucene.QueryParser('all', analyzer) query_parser.setOperator(PyLucene.QueryParser.DEFAULT_OPERATOR_AND) query = query_parser.parseQuery(reassembled_query) self.logger.info('Search query: %s', query) hits = searcher.search(query) return hits, searcher except Exception, e: self.logger.error('Search failed: %s', e) #self.logger.error(traceback.format_stack()) if hits \ and searcher: return hits, searcher else: return [], None
def search(self, query, fields=FEED_ENTRY_FIELDS, analyzer=None, store=None): if not query or len(query.strip()) == 0 or len(fields) == 0: return None analyzer = analyzer or self.analyzer if store is None: store = self.entry_modifier.store if len(fields) > 1: qp = lucene.MultiFieldQueryParser(fields, analyzer) else: qp = lucene.QueryParser(fields[0], analyzer) q = qp.parse(query) searcher = lucene.IndexSearcher(store) hits = searcher.search(q, lucene.Sort.RELEVANCE) return HitHolder(hits, searcher)
def search_node_by_attribute2(self, att_type, att_value): if self.searcher is None: self.searcher = PyLucene.IndexSearcher("index") analyzer = PyLucene.StandardAnalyzer() if att_type != "" and att_value == "": parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_TYPE_NID, analyzer) query = parser.parse(att_type) elif att_type == "" and att_value != "": parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_VALUE, analyzer) query = parser.parse(att_value) elif att_type != "" and att_value != "": parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_VALUE, analyzer) query = parser.parse(COLUMN_ATTRIBUTE_TYPE_NID + ":" + att_type + " AND " + att_value) hits = self.searcher.search(query) result = self.hits_to_list(hits) return result
def __init__(self, basedir, analyzer=None, create_allowed=True): """Initialize or open an indexing database. Any derived class must override __init__. :raise ValueError: The given location exists, but the database type is incompatible (e.g. created by a different indexing engine) :raise OSError: the database failed to initialize :param basedir: The parent directory of the database :type basedir: str :param analyzer: Bitwise combination of possible analyzer flags to be used as the default analyzer for this database. Leave it empty to use the system default analyzer (self.ANALYZER_DEFAULT). See self.ANALYZER_TOKENIZE, self.ANALYZER_PARTIAL, ... :type analyzer: int :param create_allowed: create the database, if necessary; default: True :type create_allowed: bool """ jvm = PyLucene.getVMEnv() jvm.attachCurrentThread() super(PyLuceneDatabase, self).__init__(basedir, analyzer=analyzer, create_allowed=create_allowed) self.pyl_analyzer = PyLucene.StandardAnalyzer() self.writer = None self.reader = None self.index_version = None try: # try to open an existing database tempreader = PyLucene.IndexReader.open(self.location) tempreader.close() except PyLucene.JavaError as err_msg: # Write an error out, in case this is a real problem instead of an absence of an index # TODO: turn the following two lines into debug output #errorstr = str(e).strip() + "\n" + self.errorhandler.traceback_str() #DEBUG_FOO("could not open index, so going to create: " + errorstr) # Create the index, so we can open cached readers on it if not create_allowed: raise OSError("Indexer: skipping database creation") try: # create the parent directory if it does not exist parent_path = os.path.dirname(self.location) if not os.path.isdir(parent_path): # recursively create all directories up to parent_path os.makedirs(parent_path) except IOError as err_msg: raise OSError("Indexer: failed to create the parent " "directory (%s) of the indexing database: %s" % (parent_path, err_msg)) try: tempwriter = PyLucene.IndexWriter(self.location, self.pyl_analyzer, True) tempwriter.close() except PyLucene.JavaError as err_msg: raise OSError("Indexer: failed to open or create a Lucene" " database (%s): %s" % (self.location, err_msg)) # the indexer is initialized - now we prepare the searcher # windows file locking seems inconsistent, so we try 10 times numtries = 0 #self.dir_lock.acquire(blocking=True) # read "self.reader", "self.indexVersion" and "self.searcher" try: while numtries < 10: try: self.reader = PyLucene.IndexReader.open(self.location) self.indexVersion = self.reader.getCurrentVersion( self.location) self.searcher = PyLucene.IndexSearcher(self.reader) break except PyLucene.JavaError as e: # store error message for possible later re-raise (below) lock_error_msg = e time.sleep(0.01) numtries += 1 else: # locking failed for 10 times raise OSError("Indexer: failed to lock index database" " (%s)" % lock_error_msg) finally: pass # self.dir_lock.release() # initialize the searcher and the reader self._index_refresh()
tempwriter.close() except PyLucene.JavaError, err_msg: raise OSError("Indexer: failed to open or create a Lucene" \ + " database (%s): %s" % (self.location, err_msg)) # the indexer is initialized - now we prepare the searcher # windows file locking seems inconsistent, so we try 10 times numtries = 0 #self.dir_lock.acquire(blocking=True) # read "self.reader", "self.indexVersion" and "self.searcher" try: while numtries < 10: try: self.reader = PyLucene.IndexReader.open(self.location) self.indexVersion = self.reader.getCurrentVersion( self.location) self.searcher = PyLucene.IndexSearcher(self.reader) break except PyLucene.JavaError, e: # store error message for possible later re-raise (below) lock_error_msg = e time.sleep(0.01) numtries += 1 else: # locking failed for 10 times raise OSError("Indexer: failed to lock index database" \ + " (%s)" % lock_error_msg) finally: pass # self.dir_lock.release() # initialize the searcher and the reader self._index_refresh()
def get_searcher (self): return PyLucene.IndexSearcher(PyLucene.FSDirectory.getDirectory( self.config.search_index_dir, False))