def doQuery(query, start, nb_results): parser = QueryParser("text", StandardAnalyzer()) parser.setDefaultOperator(QueryParser.Operator.AND) query = parser.parse(query) hits = searcher.search(query) return formatResultsPlain(query, hits, nb_results, start)
def getIndexWriter(self): writer = IndexWriter( DbDirectory(self.store.txn, self._db, self.store._blocks._db, self._flags), StandardAnalyzer(), False) writer.setUseCompoundFile(False) return writer
def open(self, name, txn, **kwds): super(IndexContainer, self).open(name, txn, **kwds) if kwds.get('create', False): directory = DbDirectory(txn, self._db, self.store._blocks._db, self._flags) indexWriter = IndexWriter(directory, StandardAnalyzer(), True) indexWriter.close()
def searchDocuments(self, version, query, attribute=None): directory = DbDirectory(self.store.txn, self._db, self.store._blocks._db, self._flags) searcher = IndexSearcher(directory) query = QueryParser.parse(query, "contents", StandardAnalyzer()) docs = {} for i, doc in searcher.search(query): ver = long(doc['version']) if ver <= version: uuid = UUID(doc['owner']) dv = docs.get(uuid, None) if dv is None or dv[0] < ver: docAttr = doc['attribute'] if attribute is None or attribute == docAttr: docs[uuid] = (ver, docAttr) searcher.close() return docs
def doQuery(searcher, flds, query, start, nb_results): parser = QueryParser("text", StandardAnalyzer()) parser.setDefaultOperator(QueryParser.Operator.AND) query = parser.parse(query) hits = searcher.search(query) formatResultsTable(query, flds, hits, start, nb_results)
results = [] for i, doc in hits: results.append([doc.get("name"), doc.get("owner").encode('gbk'), doc.get("title").encode('gbk')]) # sort result results.sort(lambda x,y: cmp(x[0],y[0])) for name,owner,title in results: print name, owner, title def test_fixture(): global BOARDSPATH BOARDSPATH = './' if __name__ == '__main__': #test_fixture() board = sys.argv[1] querystr = sys.argv[2].decode('gbk').strip() path = BOARDSPATH+board+'/'+RECENT_INDEX if not os.path.exists(path) or len(querystr) == 0: sys.exit(-1) directory = FSDirectory.getDirectory(path, False) searcher = IndexSearcher(directory) analyzer = StandardAnalyzer() run(searcher, analyzer, querystr) searcher.close()
#!/usr/bin/env python2.4 from mailbox import UnixMailbox from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter from email import EmailDoc store = FSDirectory.getDirectory('chipy-index', True) writer = IndexWriter(store, StandardAnalyzer(), True) mailbox = UnixMailbox(open('chipy.mbox')) while True: msg = mailbox.next() if msg == None: break writer.addDocument(EmailDoc(msg)) writer.close()
def __init__(self, indexPath, batchMode=False, analyzer=None): self.batchMode = batchMode self.indexPath = indexPath self.analyzer = analyzer or StandardAnalyzer()
#!/usr/bin/env python2.4 from sys import argv from PyLucene import FSDirectory, IndexSearcher, QueryParser, StandardAnalyzer string = argv[1].strip() directory = FSDirectory.getDirectory( 'chipy-index', False ) searcher = IndexSearcher( directory ) query = QueryParser.parse( string, 'all', StandardAnalyzer() ) hits = searcher.search( query ) for i in range(0,hits.length()): doc = hits.doc(i) print "ID: %s" % doc.getField('id').stringValue() print "From: %s" % doc.getField('from').stringValue() print "Subject: %s" % doc.getField('subject').stringValue() print "Date: %s" % doc.getField('date').stringValue() print
Field.Index.UN_TOKENIZED)) doc.add( Field("pmid", pmid, Field.Store.YES, Field.Index.UN_TOKENIZED)) doc.add( Field("text", span_text, Field.Store.YES, Field.Index.TOKENIZED)) addAnnotations(doc, span_id) writer.addDocument(doc) except Exception, e: sys.stderr.write("error: %s pmid: %s span_id: %s\n" % (e, pmid, span_id)) i += 2 if __name__ == '__main__': if len(sys.argv) == 1: print "Usage: python index_spans.py data_norm index_dir annotation_files" else: (data_norm, index_dir, annotation_files) = \ (sys.argv[1], sys.argv[2], sys.argv[3:]) print "Loading annotations ..." load(annotation_files) print "Making the index ..." writer = IndexWriter(index_dir, StandardAnalyzer(), True) writer.setMaxFieldLength(7 * 1000 * 1000 * 10) indexData(data_norm) print "Optimizing index ..." writer.optimize() print "Indexing complete" writer.close()