示例#1
0
    def search(self,
               q,
               models=None,
               order_by=RELEVANCE,
               limit=None,
               offset=None):
        """Perform a search."""
        original_query = q
        q = query.convert(original_query, LuceneQueryConverter)
        if models:
            models_queries = []
            for m in models:
                if hasattr(m, "_meta"):
                    models_queries.append('%s:"%s"' % (MODEL_FIELD, m._meta))
                else:
                    models_queries.append('%s:"%s"' % (MODEL_FIELD, m))
            q += ' AND (%s)' % (' '.join(models_queries))

        searcher = PyLucene.IndexSearcher(settings.SEARCH_INDEX_PATH)
        analyzer = PorterStemmerAnalyzer()
        compiled_query = PyLucene.QueryParser(CONTENTS_FIELD,
                                              analyzer).parse(q)

        if order_by is RELEVANCE:
            sort = PyLucene.Sort.RELEVANCE
        else:
            reversed = order_by.startswith('-')
            sort_field = PyLucene.SortField(order_by.lstrip('-'), reversed)
            sort = PyLucene.Sort(sort_field)

        hits = searcher.search(compiled_query, sort)
        return self._get_search_results(original_query, hits, limit, offset)
示例#2
0
 def _index_refresh(self):
     """re-read the indexer database"""
     try:
         if self.reader is None or self.searcher is None:
             self.reader = PyLucene.IndexReader.open(self.location)
             self.searcher = PyLucene.IndexSearcher(self.reader)
         elif self.index_version != self.reader.getCurrentVersion( \
                 self.location):
             self.searcher.close()
             self.reader.close()
             self.reader = PyLucene.IndexReader.open(self.location)
             self.searcher = PyLucene.IndexSearcher(self.reader)
             self.index_version = self.reader.getCurrentVersion(self.location)
     except PyLucene.JavaError,e:
         # TODO: add some debugging output?
         #self.errorhandler.logerror("Error attempting to read index - try reindexing: "+str(e))
         pass
示例#3
0
    def search_node_by_name2(self, name):
        if self.searcher is None:
            self.searcher = PyLucene.IndexSearcher("index")

        query = PyLucene.QueryParser(COLUMN_NAME,
                                     PyLucene.StandardAnalyzer()).parse(name)
        hits = self.searcher.search(query)
        result = self.hits_to_list(hits)

        return result
示例#4
0
        def openReadIndex(self):
            luceneDir = self.store.newDirectory(self.indexDirectory)

            if not luceneDir.exists():
                self.openWriteIndex().close()

            fsdir = PyLucene.FSDirectory.getDirectory(luceneDir.path, False)
            try:
                searcher = PyLucene.IndexSearcher(fsdir)
            except PyLucene.JavaError, e:
                raise IndexCorrupt()
示例#5
0
    def search(self,
               query_string='',
               require_visible=True,
               allow_curated=True):

        hits = []
        query_string = str(query_string)
        self.logger.info('Performing search: %s' % query_string)
        disassembled_query = disassemble_user_query(query_string)
        self.logger.debug('Disassembled query: %s' % str(disassembled_query))
        reassembled_query = '+(%s)' % reassemble_user_query(disassembled_query)
        self.logger.debug('Reassembled query: %s', reassembled_query)

        if not allow_curated:
            reassembled_query += \
                ' -record-status:%s' % canary.loader.QueuedRecord.STATUS_CURATED

        if require_visible:
            reassembled_query += ' +article-type:[%s TO %s]' % \
                (Study.ARTICLE_TYPES['traditional'],
                Study.ARTICLE_TYPES['curated'])
            reassembled_query += ' +record-status:%s' % \
                canary.loader.QueuedRecord.STATUS_CURATED

        try:
            searcher = PyLucene.IndexSearcher(
                PyLucene.FSDirectory.getDirectory(
                    self.context.config.search_index_dir, False))
            analyzer = PyLucene.StandardAnalyzer()
            query_parser = PyLucene.QueryParser('all', analyzer)
            query_parser.setOperator(PyLucene.QueryParser.DEFAULT_OPERATOR_AND)
            query = query_parser.parseQuery(reassembled_query)
            self.logger.info('Search query: %s', query)
            hits = searcher.search(query)
            return hits, searcher
        except Exception, e:
            self.logger.error('Search failed: %s', e)
            #self.logger.error(traceback.format_stack())
            if hits \
                and searcher:
                return hits, searcher
            else:
                return [], None
示例#6
0
    def search(self,
               query,
               fields=FEED_ENTRY_FIELDS,
               analyzer=None,
               store=None):
        if not query or len(query.strip()) == 0 or len(fields) == 0:
            return None
        analyzer = analyzer or self.analyzer
        if store is None:
            store = self.entry_modifier.store

        if len(fields) > 1:
            qp = lucene.MultiFieldQueryParser(fields, analyzer)
        else:
            qp = lucene.QueryParser(fields[0], analyzer)
        q = qp.parse(query)

        searcher = lucene.IndexSearcher(store)
        hits = searcher.search(q, lucene.Sort.RELEVANCE)
        return HitHolder(hits, searcher)
示例#7
0
    def search_node_by_attribute2(self, att_type, att_value):
        if self.searcher is None:
            self.searcher = PyLucene.IndexSearcher("index")

        analyzer = PyLucene.StandardAnalyzer()

        if att_type != "" and att_value == "":
            parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_TYPE_NID, analyzer)
            query = parser.parse(att_type)
        elif att_type == "" and att_value != "":
            parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_VALUE, analyzer)
            query = parser.parse(att_value)
        elif att_type != "" and att_value != "":
            parser = PyLucene.QueryParser(COLUMN_ATTRIBUTE_VALUE, analyzer)
            query = parser.parse(COLUMN_ATTRIBUTE_TYPE_NID + ":" + att_type +
                                 " AND " + att_value)

        hits = self.searcher.search(query)
        result = self.hits_to_list(hits)

        return result
示例#8
0
    def __init__(self, basedir, analyzer=None, create_allowed=True):
        """Initialize or open an indexing database.

        Any derived class must override __init__.

        :raise ValueError: The given location exists, but the database type
                           is incompatible (e.g. created by a different indexing engine)
        :raise OSError: the database failed to initialize

        :param basedir: The parent directory of the database
        :type basedir: str
        :param analyzer: Bitwise combination of possible analyzer flags
                         to be used as the default analyzer for this database.
                         Leave it empty to use the system default analyzer
                         (self.ANALYZER_DEFAULT). See self.ANALYZER_TOKENIZE,
                         self.ANALYZER_PARTIAL, ...
        :type analyzer: int
        :param create_allowed: create the database, if necessary; default: True
        :type create_allowed: bool
        """
        jvm = PyLucene.getVMEnv()
        jvm.attachCurrentThread()
        super(PyLuceneDatabase, self).__init__(basedir,
                                               analyzer=analyzer,
                                               create_allowed=create_allowed)
        self.pyl_analyzer = PyLucene.StandardAnalyzer()
        self.writer = None
        self.reader = None
        self.index_version = None
        try:
            # try to open an existing database
            tempreader = PyLucene.IndexReader.open(self.location)
            tempreader.close()
        except PyLucene.JavaError as err_msg:
            # Write an error out, in case this is a real problem instead of an absence of an index
            # TODO: turn the following two lines into debug output
            #errorstr = str(e).strip() + "\n" + self.errorhandler.traceback_str()
            #DEBUG_FOO("could not open index, so going to create: " + errorstr)
            # Create the index, so we can open cached readers on it
            if not create_allowed:
                raise OSError("Indexer: skipping database creation")
            try:
                # create the parent directory if it does not exist
                parent_path = os.path.dirname(self.location)
                if not os.path.isdir(parent_path):
                    # recursively create all directories up to parent_path
                    os.makedirs(parent_path)
            except IOError as err_msg:
                raise OSError("Indexer: failed to create the parent "
                              "directory (%s) of the indexing database: %s" %
                              (parent_path, err_msg))
            try:
                tempwriter = PyLucene.IndexWriter(self.location,
                                                  self.pyl_analyzer, True)
                tempwriter.close()
            except PyLucene.JavaError as err_msg:
                raise OSError("Indexer: failed to open or create a Lucene"
                              " database (%s): %s" % (self.location, err_msg))
        # the indexer is initialized - now we prepare the searcher
        # windows file locking seems inconsistent, so we try 10 times
        numtries = 0
        #self.dir_lock.acquire(blocking=True)
        # read "self.reader", "self.indexVersion" and "self.searcher"
        try:
            while numtries < 10:
                try:
                    self.reader = PyLucene.IndexReader.open(self.location)
                    self.indexVersion = self.reader.getCurrentVersion(
                        self.location)
                    self.searcher = PyLucene.IndexSearcher(self.reader)
                    break
                except PyLucene.JavaError as e:
                    # store error message for possible later re-raise (below)
                    lock_error_msg = e
                    time.sleep(0.01)
                    numtries += 1
            else:
                # locking failed for 10 times
                raise OSError("Indexer: failed to lock index database"
                              " (%s)" % lock_error_msg)
        finally:
            pass
        #    self.dir_lock.release()
        # initialize the searcher and the reader
        self._index_refresh()
示例#9
0
         tempwriter.close()
     except PyLucene.JavaError, err_msg:
         raise OSError("Indexer: failed to open or create a Lucene" \
                 + " database (%s): %s" % (self.location, err_msg))
 # the indexer is initialized - now we prepare the searcher
 # windows file locking seems inconsistent, so we try 10 times
 numtries = 0
 #self.dir_lock.acquire(blocking=True)
 # read "self.reader", "self.indexVersion" and "self.searcher"
 try:
     while numtries < 10:
         try:
             self.reader = PyLucene.IndexReader.open(self.location)
             self.indexVersion = self.reader.getCurrentVersion(
                 self.location)
             self.searcher = PyLucene.IndexSearcher(self.reader)
             break
         except PyLucene.JavaError, e:
             # store error message for possible later re-raise (below)
             lock_error_msg = e
             time.sleep(0.01)
             numtries += 1
     else:
         # locking failed for 10 times
         raise OSError("Indexer: failed to lock index database" \
                       + " (%s)" % lock_error_msg)
 finally:
     pass
 #    self.dir_lock.release()
 # initialize the searcher and the reader
 self._index_refresh()
示例#10
0
 def get_searcher (self):
     return PyLucene.IndexSearcher(PyLucene.FSDirectory.getDirectory(
         self.config.search_index_dir, False))