Пример #1
0
    def test_numDocs_AND_maxDoc(self):
        indLoc = self.extractTestIndex()
        r = lucene.IndexReader.open(indLoc)

        totalDocsInFileSystem = len(test_base.listFilesDestinedForTestIndex())
        self.assertEqual(r.numDocs(), totalDocsInFileSystem)

        # maxDoc is one greater than the greatest document identifier in the
        # index.  Since this index is perfectly optimized (no gaps in doc ids),
        # maxDoc will be equal to numDocs (maxDoc is 0-based; numDocs is
        # (obviously) 1-based).
        self.assertEqual(r.maxDoc(), r.numDocs())

        r.close()
        self.assertRaises(IOError, r.numDocs)
        self.assertRaises(IOError, r.maxDoc)
Пример #2
0
def createTestIndexArchive():
    # Create a tar file containing the index of pyclene's own source code.
    # Place that tar file in the temp directory, and set a global reference
    # to it so that test code that requires a sample index can use it
    # (typically via one of the test_base.CommonBaseTest.extractTestIndex*
    # methods).

    pv('--- CREATING TEST INDEX OF OWN SOURCE CODE ---')

    allFilenames = test_base.listFilesDestinedForTestIndex()

    commonPrefix = os.path.commonprefix(allFilenames)
    commonPrefixLen = len(commonPrefix)

    tempIndexDir = test_base.generateTempFilename(suffix='_pyclene_test_index')
    assert not os.path.exists(tempIndexDir)
    fsDir = lucene.FSDirectory(tempIndexDir, True)
    assert os.path.isdir(tempIndexDir)

    w = lucene.IndexWriter(fsDir, lucene.StandardAnalyzer(), True)
    w.maxFieldLength = sys.maxint
    assert w.docCount() == 0
    for filename in allFilenames:
        doc = test_index.FileDocument(filename)
        pv('Indexing [%s]' % filename[commonPrefixLen:])
        w.addDocument(doc)

    assert w.docCount() == len(allFilenames)
    w.close()

    pv('\n')
    indexFNs = []
    try:
        try:
            for optFlag in ('UNOPTIMIZED', 'OPTIMIZED'):
                tarFilename = test_base.generateTempFilename(
                    '.pyclene_test_index-%s.tar' % optFlag
                  )
                indexFNs.append(tarFilename)
                tarFile = tarfile.open(tarFilename, 'w')
                for indexComponent in fsDir:
                    fullIndexComponent = os.path.join(fsDir.name, indexComponent)
                    tarFile.add(fullIndexComponent, indexComponent)
                tarFile.close()

                w = lucene.IndexWriter(fsDir, lucene.StandardAnalyzer(), False)
                w.optimize()
                w.close()

                pv('%s: %s KB' % (
                    ('Test index size (%s)' % optFlag).ljust(29),
                    str(os.path.getsize(tarFilename) / 1024).rjust(10)
                  ))
        except:
            origEx = sys.exc_info()[1]
            for tempFN in indexFNs:
                if os.path.isfile(tempFN):
                    os.remove(tempFN)

            raise origEx
    finally:
        shutil.rmtree(tempIndexDir)

    return indexFNs