示例#1
0
文件: r.py 项目: alexksikes/TREC-2006
def main(argv):
    # defaults
    query = ""
    start = 0
    nb_results = 10
    try:
        opts, args = getopt.getopt(argv, "i:f:q:s:r:", ["lucene_index=", "fields=", "query=", "start=", "nb_results="])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    if not opts:
        usage()
        sys.exit()
    for opt, arg in opts:
        if opt in ("-i", "--lucene_index"):
            fsDir = FSDirectory.getDirectory(arg, False)
            searcher = IndexSearcher(fsDir)
        elif opt in ("-f", "--fields"):
            flds = arg.split(",")
        elif opt in ("-q", "--query"):
            query = arg
        elif opt in ("-s", "--start"):
            start = int(arg)
        elif opt in ("-r", "--nb_results"):
            nb_results = int(arg)
    doQuery(searcher, flds, query, start, nb_results)
示例#2
0
def main(argv):
    # defaults
    query = ''
    start = 0
    nb_results = 10
    try:
        opts, args = getopt.getopt(argv, "i:q:s:r:", \
                   ["lucene_index=", "query=", "start=", "nb_results="])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    if not opts:
        usage()
        sys.exit()
    for opt, arg in opts:
        if opt in ("-i", "--lucene_index"):
            fsDir = FSDirectory.getDirectory(arg, False)
            searcher = IndexSearcher(fsDir)
        elif opt in ("-q", "--query"):
            query = arg
        elif opt in ("-s", "--start"):
            start = int(arg)
        elif opt in ("-r", "--nb_results"):
            nb_results = int(arg)
    doQuery(searcher, query, start, nb_results)
示例#3
0
 def getIndexDirectory(self):
     if not IndexSupport.indexDirectories.has_key(self.indexPath):
         if not os.path.exists(self.indexPath):
             os.makedirs(self.indexPath)
         IndexSupport.indexDirectories[
             self.indexPath] = FSDirectory.getDirectory(
                 self.indexPath, False)
     return IndexSupport.indexDirectories[self.indexPath]
示例#4
0
    
    def _getdoc(self, filename, args):
        doc = "0 |"
        m = self.__class__.p.search(args)
        if m and filename == '/search':
        #    doc = doQuery(m.group(1).split('+'), int(m.group(2)), int(m.group(3)))
            doc = doQuery(urllib.unquote(m.group(1)), int(m.group(2)), int(m.group(3)))
        return doc
        
    def do_GET(self):
        url = urlparse.urlsplit(self.path)
        doc = self._getdoc(url[2], url[3])
        self._writeheaders(doc)
        self.wfile.write(doc)
                
class SearchServer(HTTPServer):
    allow_reuse_address = 1

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "Usage: python lucene_server.py index_dir"
    else:
        indexDir = sys.argv[1]
        fsDir = FSDirectory.getDirectory(indexDir, False)
        searcher = IndexSearcher(fsDir)
        
        serveraddr = ('', SERVER_PORT)
        srvr = SearchServer(serveraddr, SearchRequestHandler)
        print "Ready to serve search queries"
        srvr.serve_forever()
示例#5
0
#!/usr/bin/python

from sys import argv
from PyLucene import FSDirectory, IndexSearcher, TermQuery, Term

id = argv[1].strip()
directory = FSDirectory.getDirectory( 'chipy-index', False )
searcher = IndexSearcher( directory )
query = TermQuery( Term( 'id', id ) )
hits = searcher.search( query )

doc = hits.doc(0)
print "ID: %s" % doc.getField('id').stringValue()
print "From: %s" % doc.getField('from').stringValue()
print "Subject: %s" % doc.getField('subject').stringValue()
print "Date: %s" % doc.getField('date').stringValue()
print doc.getField('body').stringValue()
print 

'path' and 'name' fields for each of the hits it finds in the index.  Note that
search.close() is currently commented out because it causes a stack overflow in
some cases.
"""
def run(searcher, analyzer):

    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        if command == '':
            return

        print
        print "Searching for:", command
        query = QueryParser("contents", analyzer).parse(command)
        hits = searcher.search(query)
        print "%s total matching documents" % hits.length()
        
        for i, doc in hits:
            print 'path:', doc.get("path"), 'name:', doc.get("name"), 100*hits.score(i)

if __name__ == '__main__':
    STORE_DIR = "index"
    print 'PyLucene', VERSION, 'Lucene', LUCENE_VERSION
    directory = FSDirectory.getDirectory(STORE_DIR, False)
    searcher = IndexSearcher(directory)
    analyzer = StandardAnalyzer()
    run(searcher, analyzer)
    searcher.close()
示例#7
0
  results = []

  for i, doc in hits:
    results.append([doc.get("name"), doc.get("owner").encode('gbk'), doc.get("title").encode('gbk')])
  
  # sort result
  results.sort(lambda x,y: cmp(x[0],y[0]))    
  for name,owner,title in results:
    print name, owner, title 

def test_fixture():
  global BOARDSPATH
  BOARDSPATH = './'

if __name__ == '__main__':
  #test_fixture()

  board = sys.argv[1]
  querystr = sys.argv[2].decode('gbk').strip()
  
  path = BOARDSPATH+board+'/'+RECENT_INDEX
  if not os.path.exists(path) or len(querystr) == 0:
    sys.exit(-1)
  directory = FSDirectory.getDirectory(path, False)
  searcher = IndexSearcher(directory)
  analyzer = StandardAnalyzer()
  run(searcher, analyzer, querystr)
  searcher.close()
    
#!/usr/bin/env python2.4
#import PyLucene
from mailbox import UnixMailbox
from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter
from email_loader import EmailDoc
import os,sys,datetime,email,config

# determine when (if) the last update was
lastUp = datetime.datetime(2000, 1, 1)
createNewIndex = True

quiet = False
if len(sys.argv) > 1 and sys.argv[1] in ('-q', '--quiet'):
  quiet = True

store = FSDirectory.getDirectory( config.DB_PATH, True )
writer = IndexWriter( store, StandardAnalyzer(), True )

"""
mailbox = UnixMailbox( open('chipy.mbox') )
while True:
    msg = mailbox.next()
    if msg == None: break
    writer.addDocument( EmailDoc(msg) )
"""

source=config.MAILDIR_ROOT_DIR

for root, dirs, files in os.walk(source):
    if not quiet:
      sys.stdout.write('\nindexing files in %s' % root)
示例#9
0
#!/usr/bin/env python2.4

from mailbox import UnixMailbox
from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter
from email import EmailDoc

store = FSDirectory.getDirectory('chipy-index', True)
writer = IndexWriter(store, StandardAnalyzer(), True)

mailbox = UnixMailbox(open('chipy.mbox'))
while True:
    msg = mailbox.next()
    if msg == None: break
    writer.addDocument(EmailDoc(msg))

writer.close()
示例#10
0
#!/usr/bin/env python2.4

from mailbox import UnixMailbox
from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter
from email import EmailDoc

store = FSDirectory.getDirectory( 'chipy-index', True )
writer = IndexWriter( store, StandardAnalyzer(), True )

mailbox = UnixMailbox( open('chipy.mbox') )
while True:
    msg = mailbox.next()
    if msg == None: break
    writer.addDocument( EmailDoc(msg) )

writer.close()
示例#11
0
 def getIndexDirectory(self):
     if not IndexSupport.indexDirectories.has_key(self.indexPath):
         if not os.path.exists(self.indexPath):
             os.makedirs(self.indexPath)
         IndexSupport.indexDirectories[self.indexPath] = FSDirectory.getDirectory(self.indexPath, False)
     return IndexSupport.indexDirectories[self.indexPath]
示例#12
0
#!/usr/bin/env python2.4

from sys import argv
from PyLucene import FSDirectory, IndexSearcher, QueryParser, StandardAnalyzer

string = argv[1].strip()
directory = FSDirectory.getDirectory( 'chipy-index', False )
searcher = IndexSearcher( directory )
query = QueryParser.parse( string, 'all', StandardAnalyzer() )
hits = searcher.search( query )

for i in range(0,hits.length()):
    doc = hits.doc(i)
    print "ID: %s" % doc.getField('id').stringValue()
    print "From: %s" % doc.getField('from').stringValue()
    print "Subject: %s" % doc.getField('subject').stringValue()
    print "Date: %s" % doc.getField('date').stringValue()
    print

示例#13
0
some cases.
"""


def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        if command == '':
            return

        print
        print "Searching for:", command
        query = QueryParser("contents", analyzer).parse(command)
        hits = searcher.search(query)
        print "%s total matching documents." % hits.length()

        for i, doc in hits:
            print 'path:', doc.get("path"), 'name:', doc.get("name")


if __name__ == '__main__':
    STORE_DIR = "index"
    print 'PyLucene', VERSION, 'Lucene', LUCENE_VERSION
    directory = FSDirectory.getDirectory(STORE_DIR, False)
    searcher = IndexSearcher(directory)
    analyzer = StandardAnalyzer()
    run(searcher, analyzer)
    searcher.close()
示例#14
0
        doc = "0 |"
        m = self.__class__.p.search(args)
        if m and filename == "/search":
            #    doc = doQuery(m.group(1).split('+'), int(m.group(2)), int(m.group(3)))
            doc = doQuery(urllib.unquote(m.group(1)), int(m.group(2)), int(m.group(3)))
        return doc

    def do_GET(self):
        url = urlparse.urlsplit(self.path)
        doc = self._getdoc(url[2], url[3])
        self._writeheaders(doc)
        self.wfile.write(doc)


class SearchServer(HTTPServer):
    allow_reuse_address = 1


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print "Usage: python lucene_server.py index_dir"
    else:
        indexDir = sys.argv[1]
        fsDir = FSDirectory.getDirectory(indexDir, False)
        searcher = IndexSearcher(fsDir)

        serveraddr = ("", SERVER_PORT)
        srvr = SearchServer(serveraddr, SearchRequestHandler)
        print "Ready to serve search queries"
        srvr.serve_forever()