def main(argv): # defaults query = "" start = 0 nb_results = 10 try: opts, args = getopt.getopt(argv, "i:f:q:s:r:", ["lucene_index=", "fields=", "query=", "start=", "nb_results="]) except getopt.GetoptError: usage() sys.exit(2) if not opts: usage() sys.exit() for opt, arg in opts: if opt in ("-i", "--lucene_index"): fsDir = FSDirectory.getDirectory(arg, False) searcher = IndexSearcher(fsDir) elif opt in ("-f", "--fields"): flds = arg.split(",") elif opt in ("-q", "--query"): query = arg elif opt in ("-s", "--start"): start = int(arg) elif opt in ("-r", "--nb_results"): nb_results = int(arg) doQuery(searcher, flds, query, start, nb_results)
def main(argv): # defaults query = '' start = 0 nb_results = 10 try: opts, args = getopt.getopt(argv, "i:q:s:r:", \ ["lucene_index=", "query=", "start=", "nb_results="]) except getopt.GetoptError: usage() sys.exit(2) if not opts: usage() sys.exit() for opt, arg in opts: if opt in ("-i", "--lucene_index"): fsDir = FSDirectory.getDirectory(arg, False) searcher = IndexSearcher(fsDir) elif opt in ("-q", "--query"): query = arg elif opt in ("-s", "--start"): start = int(arg) elif opt in ("-r", "--nb_results"): nb_results = int(arg) doQuery(searcher, query, start, nb_results)
def getIndexDirectory(self): if not IndexSupport.indexDirectories.has_key(self.indexPath): if not os.path.exists(self.indexPath): os.makedirs(self.indexPath) IndexSupport.indexDirectories[ self.indexPath] = FSDirectory.getDirectory( self.indexPath, False) return IndexSupport.indexDirectories[self.indexPath]
def _getdoc(self, filename, args): doc = "0 |" m = self.__class__.p.search(args) if m and filename == '/search': # doc = doQuery(m.group(1).split('+'), int(m.group(2)), int(m.group(3))) doc = doQuery(urllib.unquote(m.group(1)), int(m.group(2)), int(m.group(3))) return doc def do_GET(self): url = urlparse.urlsplit(self.path) doc = self._getdoc(url[2], url[3]) self._writeheaders(doc) self.wfile.write(doc) class SearchServer(HTTPServer): allow_reuse_address = 1 if __name__ == '__main__': if len(sys.argv) != 2: print "Usage: python lucene_server.py index_dir" else: indexDir = sys.argv[1] fsDir = FSDirectory.getDirectory(indexDir, False) searcher = IndexSearcher(fsDir) serveraddr = ('', SERVER_PORT) srvr = SearchServer(serveraddr, SearchRequestHandler) print "Ready to serve search queries" srvr.serve_forever()
#!/usr/bin/python from sys import argv from PyLucene import FSDirectory, IndexSearcher, TermQuery, Term id = argv[1].strip() directory = FSDirectory.getDirectory( 'chipy-index', False ) searcher = IndexSearcher( directory ) query = TermQuery( Term( 'id', id ) ) hits = searcher.search( query ) doc = hits.doc(0) print "ID: %s" % doc.getField('id').stringValue() print "From: %s" % doc.getField('from').stringValue() print "Subject: %s" % doc.getField('subject').stringValue() print "Date: %s" % doc.getField('date').stringValue() print doc.getField('body').stringValue() print
'path' and 'name' fields for each of the hits it finds in the index. Note that search.close() is currently commented out because it causes a stack overflow in some cases. """ def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") if command == '': return print print "Searching for:", command query = QueryParser("contents", analyzer).parse(command) hits = searcher.search(query) print "%s total matching documents" % hits.length() for i, doc in hits: print 'path:', doc.get("path"), 'name:', doc.get("name"), 100*hits.score(i) if __name__ == '__main__': STORE_DIR = "index" print 'PyLucene', VERSION, 'Lucene', LUCENE_VERSION directory = FSDirectory.getDirectory(STORE_DIR, False) searcher = IndexSearcher(directory) analyzer = StandardAnalyzer() run(searcher, analyzer) searcher.close()
results = [] for i, doc in hits: results.append([doc.get("name"), doc.get("owner").encode('gbk'), doc.get("title").encode('gbk')]) # sort result results.sort(lambda x,y: cmp(x[0],y[0])) for name,owner,title in results: print name, owner, title def test_fixture(): global BOARDSPATH BOARDSPATH = './' if __name__ == '__main__': #test_fixture() board = sys.argv[1] querystr = sys.argv[2].decode('gbk').strip() path = BOARDSPATH+board+'/'+RECENT_INDEX if not os.path.exists(path) or len(querystr) == 0: sys.exit(-1) directory = FSDirectory.getDirectory(path, False) searcher = IndexSearcher(directory) analyzer = StandardAnalyzer() run(searcher, analyzer, querystr) searcher.close()
#!/usr/bin/env python2.4 #import PyLucene from mailbox import UnixMailbox from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter from email_loader import EmailDoc import os,sys,datetime,email,config # determine when (if) the last update was lastUp = datetime.datetime(2000, 1, 1) createNewIndex = True quiet = False if len(sys.argv) > 1 and sys.argv[1] in ('-q', '--quiet'): quiet = True store = FSDirectory.getDirectory( config.DB_PATH, True ) writer = IndexWriter( store, StandardAnalyzer(), True ) """ mailbox = UnixMailbox( open('chipy.mbox') ) while True: msg = mailbox.next() if msg == None: break writer.addDocument( EmailDoc(msg) ) """ source=config.MAILDIR_ROOT_DIR for root, dirs, files in os.walk(source): if not quiet: sys.stdout.write('\nindexing files in %s' % root)
#!/usr/bin/env python2.4 from mailbox import UnixMailbox from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter from email import EmailDoc store = FSDirectory.getDirectory('chipy-index', True) writer = IndexWriter(store, StandardAnalyzer(), True) mailbox = UnixMailbox(open('chipy.mbox')) while True: msg = mailbox.next() if msg == None: break writer.addDocument(EmailDoc(msg)) writer.close()
#!/usr/bin/env python2.4 from mailbox import UnixMailbox from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter from email import EmailDoc store = FSDirectory.getDirectory( 'chipy-index', True ) writer = IndexWriter( store, StandardAnalyzer(), True ) mailbox = UnixMailbox( open('chipy.mbox') ) while True: msg = mailbox.next() if msg == None: break writer.addDocument( EmailDoc(msg) ) writer.close()
def getIndexDirectory(self): if not IndexSupport.indexDirectories.has_key(self.indexPath): if not os.path.exists(self.indexPath): os.makedirs(self.indexPath) IndexSupport.indexDirectories[self.indexPath] = FSDirectory.getDirectory(self.indexPath, False) return IndexSupport.indexDirectories[self.indexPath]
#!/usr/bin/env python2.4 from sys import argv from PyLucene import FSDirectory, IndexSearcher, QueryParser, StandardAnalyzer string = argv[1].strip() directory = FSDirectory.getDirectory( 'chipy-index', False ) searcher = IndexSearcher( directory ) query = QueryParser.parse( string, 'all', StandardAnalyzer() ) hits = searcher.search( query ) for i in range(0,hits.length()): doc = hits.doc(i) print "ID: %s" % doc.getField('id').stringValue() print "From: %s" % doc.getField('from').stringValue() print "Subject: %s" % doc.getField('subject').stringValue() print "Date: %s" % doc.getField('date').stringValue() print
some cases. """ def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") if command == '': return print print "Searching for:", command query = QueryParser("contents", analyzer).parse(command) hits = searcher.search(query) print "%s total matching documents." % hits.length() for i, doc in hits: print 'path:', doc.get("path"), 'name:', doc.get("name") if __name__ == '__main__': STORE_DIR = "index" print 'PyLucene', VERSION, 'Lucene', LUCENE_VERSION directory = FSDirectory.getDirectory(STORE_DIR, False) searcher = IndexSearcher(directory) analyzer = StandardAnalyzer() run(searcher, analyzer) searcher.close()
doc = "0 |" m = self.__class__.p.search(args) if m and filename == "/search": # doc = doQuery(m.group(1).split('+'), int(m.group(2)), int(m.group(3))) doc = doQuery(urllib.unquote(m.group(1)), int(m.group(2)), int(m.group(3))) return doc def do_GET(self): url = urlparse.urlsplit(self.path) doc = self._getdoc(url[2], url[3]) self._writeheaders(doc) self.wfile.write(doc) class SearchServer(HTTPServer): allow_reuse_address = 1 if __name__ == "__main__": if len(sys.argv) != 2: print "Usage: python lucene_server.py index_dir" else: indexDir = sys.argv[1] fsDir = FSDirectory.getDirectory(indexDir, False) searcher = IndexSearcher(fsDir) serveraddr = ("", SERVER_PORT) srvr = SearchServer(serveraddr, SearchRequestHandler) print "Ready to serve search queries" srvr.serve_forever()