Пример #1
0
    def open(self, name, txn, **kwds):

        super(IndexContainer, self).open(name, txn, **kwds)

        if kwds.get('create', False):
            directory = DbDirectory(txn, self._db, self.store._blocks._db,
                                    self._flags)
            indexWriter = IndexWriter(directory, StandardAnalyzer(), True)
            indexWriter.close()
Пример #2
0
class IndexAppService(Indexer):

    def __init__(self, directory, shaManager, app='futil'):
        self._directory = directory
        create = not IndexReader.indexExists(self._directory)
        self._writer = IndexWriter(self._directory, StandardAnalyzer(), create)
        self.shaBBDD = shaManager
        self.logger = FutilLogger(app)
        self.uriLoader = UriLoader(logger=self.logger)
        self.resetCounter()

    def resetCounter(self):
        self.counter = 1000

    def countInsertion(self):
        self.counter -= 1
        if self.counter == 0:
            self.resetCounter()
            self._writer.optimize()


    def indexFOAF(self, foaf):
        document = FoafDocumentFactory.getDocumentFromFOAF(foaf)
        self._writer.addDocument(document)
        self.countInsertion()
        if ( foaf.has_key('sha')):
            for sha in foaf['sha']:
                self.shaBBDD.insertUriSha(foaf['uri'][0], sha)

        if ( foaf.has_key('friends')):
            for friendSha, friendUri in filter( lambda x: x[0] != '', foaf['friends']):
                self.shaBBDD.insertUriSha(friendUri, friendSha)
            return [u for (v,u) in foaf['friends']]
        return []

    def indexFOAFUri(self, foafUri):
        try:
            f = self.uriLoader.getFoafFrom(foafUri)
            return self.indexFOAF(f)
        except:
            self.logger.info("Unknow error indexing " + foafUri)
            return []


    def close(self):
        if self._writer:
            self._writer.close()
        self._writer = None
        self.shaBBDD.close()
Пример #3
0
        try:
            doc = Document()
            doc.add(Field("span_id", span_id,
                          Field.Store.YES, Field.Index.UN_TOKENIZED))
            doc.add(Field("pmid", pmid,
                          Field.Store.YES, Field.Index.UN_TOKENIZED))
            doc.add(Field("text", span_text,
                          Field.Store.YES, Field.Index.TOKENIZED))
            addAnnotations(doc, span_id)
            writer.addDocument(doc)
        except Exception, e:
            sys.stderr.write("error: %s pmid: %s span_id: %s\n" % (e, pmid, span_id))
        i += 2

if __name__ == '__main__':
    if len(sys.argv) == 1:
        print "Usage: python index_spans.py data_norm index_dir annotation_files"
    else:
        (data_norm, index_dir, annotation_files) = \
            (sys.argv[1], sys.argv[2], sys.argv[3:])
        print "Loading annotations ..."
        load(annotation_files)
        print "Making the index ..."
        writer = IndexWriter(index_dir, StandardAnalyzer(), True)
        writer.setMaxFieldLength(7 * 1000 * 1000 * 10)
        indexData(data_norm)
        print "Optimizing index ..."
        writer.optimize()
        print "Indexing complete"
        writer.close()
Пример #4
0
#!/usr/bin/env python2.4

from mailbox import UnixMailbox
from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter
from email import EmailDoc

store = FSDirectory.getDirectory('chipy-index', True)
writer = IndexWriter(store, StandardAnalyzer(), True)

mailbox = UnixMailbox(open('chipy.mbox'))
while True:
    msg = mailbox.next()
    if msg == None: break
    writer.addDocument(EmailDoc(msg))

writer.close()