Пример #1
0
def doQuery(query, start, nb_results):
    parser = QueryParser("text", StandardAnalyzer())
    parser.setDefaultOperator(QueryParser.Operator.AND)
    query = parser.parse(query)
    hits = searcher.search(query)
    
    return formatResultsPlain(query, hits, nb_results, start)
Пример #2
0
    def getIndexWriter(self):

        writer = IndexWriter(
            DbDirectory(self.store.txn, self._db, self.store._blocks._db,
                        self._flags), StandardAnalyzer(), False)
        writer.setUseCompoundFile(False)

        return writer
Пример #3
0
    def open(self, name, txn, **kwds):

        super(IndexContainer, self).open(name, txn, **kwds)

        if kwds.get('create', False):
            directory = DbDirectory(txn, self._db, self.store._blocks._db,
                                    self._flags)
            indexWriter = IndexWriter(directory, StandardAnalyzer(), True)
            indexWriter.close()
Пример #4
0
    def searchDocuments(self, version, query, attribute=None):

        directory = DbDirectory(self.store.txn, self._db,
                                self.store._blocks._db, self._flags)
        searcher = IndexSearcher(directory)
        query = QueryParser.parse(query, "contents", StandardAnalyzer())

        docs = {}
        for i, doc in searcher.search(query):
            ver = long(doc['version'])
            if ver <= version:
                uuid = UUID(doc['owner'])
                dv = docs.get(uuid, None)
                if dv is None or dv[0] < ver:
                    docAttr = doc['attribute']
                    if attribute is None or attribute == docAttr:
                        docs[uuid] = (ver, docAttr)

        searcher.close()

        return docs
Пример #5
0
def doQuery(searcher, flds, query, start, nb_results):
    parser = QueryParser("text", StandardAnalyzer())
    parser.setDefaultOperator(QueryParser.Operator.AND)
    query = parser.parse(query)
    hits = searcher.search(query)
    formatResultsTable(query, flds, hits, start, nb_results)
Пример #6
0
  results = []

  for i, doc in hits:
    results.append([doc.get("name"), doc.get("owner").encode('gbk'), doc.get("title").encode('gbk')])
  
  # sort result
  results.sort(lambda x,y: cmp(x[0],y[0]))    
  for name,owner,title in results:
    print name, owner, title 

def test_fixture():
  global BOARDSPATH
  BOARDSPATH = './'

if __name__ == '__main__':
  #test_fixture()

  board = sys.argv[1]
  querystr = sys.argv[2].decode('gbk').strip()
  
  path = BOARDSPATH+board+'/'+RECENT_INDEX
  if not os.path.exists(path) or len(querystr) == 0:
    sys.exit(-1)
  directory = FSDirectory.getDirectory(path, False)
  searcher = IndexSearcher(directory)
  analyzer = StandardAnalyzer()
  run(searcher, analyzer, querystr)
  searcher.close()
    
Пример #7
0
#!/usr/bin/env python2.4

from mailbox import UnixMailbox
from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter
from email import EmailDoc

store = FSDirectory.getDirectory('chipy-index', True)
writer = IndexWriter(store, StandardAnalyzer(), True)

mailbox = UnixMailbox(open('chipy.mbox'))
while True:
    msg = mailbox.next()
    if msg == None: break
    writer.addDocument(EmailDoc(msg))

writer.close()
Пример #8
0
 def __init__(self, indexPath, batchMode=False, analyzer=None):
     self.batchMode = batchMode
     self.indexPath = indexPath
     self.analyzer = analyzer or StandardAnalyzer()
Пример #9
0
#!/usr/bin/env python2.4

from sys import argv
from PyLucene import FSDirectory, IndexSearcher, QueryParser, StandardAnalyzer

string = argv[1].strip()
directory = FSDirectory.getDirectory( 'chipy-index', False )
searcher = IndexSearcher( directory )
query = QueryParser.parse( string, 'all', StandardAnalyzer() )
hits = searcher.search( query )

for i in range(0,hits.length()):
    doc = hits.doc(i)
    print "ID: %s" % doc.getField('id').stringValue()
    print "From: %s" % doc.getField('from').stringValue()
    print "Subject: %s" % doc.getField('subject').stringValue()
    print "Date: %s" % doc.getField('date').stringValue()
    print

Пример #10
0
                      Field.Index.UN_TOKENIZED))
            doc.add(
                Field("pmid", pmid, Field.Store.YES, Field.Index.UN_TOKENIZED))
            doc.add(
                Field("text", span_text, Field.Store.YES,
                      Field.Index.TOKENIZED))
            addAnnotations(doc, span_id)
            writer.addDocument(doc)
        except Exception, e:
            sys.stderr.write("error: %s pmid: %s span_id: %s\n" %
                             (e, pmid, span_id))
        i += 2


if __name__ == '__main__':
    if len(sys.argv) == 1:
        print "Usage: python index_spans.py data_norm index_dir annotation_files"
    else:
        (data_norm, index_dir, annotation_files) = \
            (sys.argv[1], sys.argv[2], sys.argv[3:])
        print "Loading annotations ..."
        load(annotation_files)
        print "Making the index ..."
        writer = IndexWriter(index_dir, StandardAnalyzer(), True)
        writer.setMaxFieldLength(7 * 1000 * 1000 * 10)
        indexData(data_norm)
        print "Optimizing index ..."
        writer.optimize()
        print "Indexing complete"
        writer.close()