def open(self, name, txn, **kwds): super(IndexContainer, self).open(name, txn, **kwds) if kwds.get('create', False): directory = DbDirectory(txn, self._db, self.store._blocks._db, self._flags) indexWriter = IndexWriter(directory, StandardAnalyzer(), True) indexWriter.close()
class IndexAppService(Indexer): def __init__(self, directory, shaManager, app='futil'): self._directory = directory create = not IndexReader.indexExists(self._directory) self._writer = IndexWriter(self._directory, StandardAnalyzer(), create) self.shaBBDD = shaManager self.logger = FutilLogger(app) self.uriLoader = UriLoader(logger=self.logger) self.resetCounter() def resetCounter(self): self.counter = 1000 def countInsertion(self): self.counter -= 1 if self.counter == 0: self.resetCounter() self._writer.optimize() def indexFOAF(self, foaf): document = FoafDocumentFactory.getDocumentFromFOAF(foaf) self._writer.addDocument(document) self.countInsertion() if ( foaf.has_key('sha')): for sha in foaf['sha']: self.shaBBDD.insertUriSha(foaf['uri'][0], sha) if ( foaf.has_key('friends')): for friendSha, friendUri in filter( lambda x: x[0] != '', foaf['friends']): self.shaBBDD.insertUriSha(friendUri, friendSha) return [u for (v,u) in foaf['friends']] return [] def indexFOAFUri(self, foafUri): try: f = self.uriLoader.getFoafFrom(foafUri) return self.indexFOAF(f) except: self.logger.info("Unknow error indexing " + foafUri) return [] def close(self): if self._writer: self._writer.close() self._writer = None self.shaBBDD.close()
try: doc = Document() doc.add(Field("span_id", span_id, Field.Store.YES, Field.Index.UN_TOKENIZED)) doc.add(Field("pmid", pmid, Field.Store.YES, Field.Index.UN_TOKENIZED)) doc.add(Field("text", span_text, Field.Store.YES, Field.Index.TOKENIZED)) addAnnotations(doc, span_id) writer.addDocument(doc) except Exception, e: sys.stderr.write("error: %s pmid: %s span_id: %s\n" % (e, pmid, span_id)) i += 2 if __name__ == '__main__': if len(sys.argv) == 1: print "Usage: python index_spans.py data_norm index_dir annotation_files" else: (data_norm, index_dir, annotation_files) = \ (sys.argv[1], sys.argv[2], sys.argv[3:]) print "Loading annotations ..." load(annotation_files) print "Making the index ..." writer = IndexWriter(index_dir, StandardAnalyzer(), True) writer.setMaxFieldLength(7 * 1000 * 1000 * 10) indexData(data_norm) print "Optimizing index ..." writer.optimize() print "Indexing complete" writer.close()
#!/usr/bin/env python2.4 from mailbox import UnixMailbox from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter from email import EmailDoc store = FSDirectory.getDirectory('chipy-index', True) writer = IndexWriter(store, StandardAnalyzer(), True) mailbox = UnixMailbox(open('chipy.mbox')) while True: msg = mailbox.next() if msg == None: break writer.addDocument(EmailDoc(msg)) writer.close()