Exemplo n.º 1
0
class PyLuceneTestCase(TestCase):
    

    def __init__(self, *args):
        super(PyLuceneTestCase, self).__init__(*args)
        self.TEST_VERSION = Version.LUCENE_CURRENT

    def setUp(self):
        self.directory = RAMDirectory()

    def tearDown(self):
        self.directory.close()
        
        
    def getWriter(self, directory=None, analyzer=None, open_mode=None):
        config = IndexWriterConfig(self.TEST_VERSION,
                    analyzer or LimitTokenCountAnalyzer(WhitespaceAnalyzer(Version.LUCENE_CURRENT), 10000)
                    )
        config.setOpenMode(open_mode or IndexWriterConfig.OpenMode.CREATE)
        return IndexWriter(directory or self.directory, config)
    
        
    def getSearcher(self, directory=None, reader=None):
        if reader is not None:
            return IndexSearcher(reader)
        return self.getReader(directory=directory)

    
    def getReader(self, directory=None):
        return DirectoryReader.open(directory or self.directory)
Exemplo n.º 2
0
    def testSlurp(self):

        fsDirReader = IndexReader.open(self.dir, True)
        self.assertEqual(len(self.keywords), fsDirReader.maxDoc())
        self.assertEqual(len(self.keywords), fsDirReader.numDocs())

        ramDir = RAMDirectory(self.dir)
        ramDirReader = IndexReader.open(ramDir, True)
        self.assertEqual(fsDirReader.maxDoc(), ramDirReader.maxDoc())
        self.assertEqual(fsDirReader.numDocs(), ramDirReader.numDocs())

        fsDirReader.close()
        ramDir.close()
Exemplo n.º 3
0
    def testSlurp(self):

        fsDirReader = IndexReader.open(self.dir, True)
        self.assertEqual(len(self.keywords), fsDirReader.maxDoc())
        self.assertEqual(len(self.keywords), fsDirReader.numDocs())

        ramDir = RAMDirectory(self.dir)
        ramDirReader = IndexReader.open(ramDir, True)
        self.assertEqual(fsDirReader.maxDoc(), ramDirReader.maxDoc())
        self.assertEqual(fsDirReader.numDocs(), ramDirReader.numDocs())

        fsDirReader.close()
        ramDir.close()
Exemplo n.º 4
0
    def setUp(self):

        self.directory = RAMDirectory()
        writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        # Elwood
        document = Document()
        document.add(
            Field("owner", "elwood", Field.Store.YES,
                  Field.Index.NOT_ANALYZED))
        document.add(
            Field("keywords", "elwoods sensitive info", Field.Store.YES,
                  Field.Index.ANALYZED))
        writer.addDocument(document)

        # Jake
        document = Document()
        document.add(
            Field("owner", "jake", Field.Store.YES, Field.Index.NOT_ANALYZED))
        document.add(
            Field("keywords", "jakes sensitive info", Field.Store.YES,
                  Field.Index.ANALYZED))
        writer.addDocument(document)

        writer.close()
Exemplo n.º 5
0
    def setUp(self):

        self.directory = RAMDirectory()
        self.analyzer = WhitespaceAnalyzer()

        writer = IndexWriter(self.directory, self.analyzer, True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        doc = Document()
        doc.add(
            Field("f", "the quick brown fox jumps over the lazy dog",
                  Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)

        doc = Document()
        doc.add(
            Field("f", "the quick red fox jumps over the sleepy cat",
                  Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)

        writer.close()

        self.searcher = IndexSearcher(self.directory, True)
        self.reader = IndexReader.open(self.directory, True)

        self.quick = SpanTermQuery(Term("f", "quick"))
        self.brown = SpanTermQuery(Term("f", "brown"))
        self.red = SpanTermQuery(Term("f", "red"))
        self.fox = SpanTermQuery(Term("f", "fox"))
        self.lazy = SpanTermQuery(Term("f", "lazy"))
        self.sleepy = SpanTermQuery(Term("f", "sleepy"))
        self.dog = SpanTermQuery(Term("f", "dog"))
        self.cat = SpanTermQuery(Term("f", "cat"))
Exemplo n.º 6
0
    def setUp(self):

        fsIndexDir = os.path.join(System.getProperty("java.io.tmpdir", "tmp"),
                                  "fs-index")
        self.rmdir(fsIndexDir)
        self.ramDir = RAMDirectory()
        self.fsDir = SimpleFSDirectory(File(fsIndexDir))
Exemplo n.º 7
0
    def setUp(self):

        self.directory = RAMDirectory()
        writer = IndexWriter(self.directory, self.porterAnalyzer, True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        doc = Document()
        doc.add(Field("contents",
                      "The quick brown fox jumps over the lazy dogs",
                       Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)
        writer.close()
Exemplo n.º 8
0
    def setUp(self):

        animals = [
            "aardvark", "beaver", "coati", "dog", "elephant", "frog",
            "gila monster", "horse", "iguana", "javelina", "kangaroo", "lemur",
            "moose", "nematode", "orca", "python", "quokka", "rat", "scorpion",
            "tarantula", "uromastyx", "vicuna", "walrus", "xiphias", "yak",
            "zebra"
        ]

        analyzer = WhitespaceAnalyzer()

        aTOmDirectory = RAMDirectory()
        nTOzDirectory = RAMDirectory()

        aTOmWriter = IndexWriter(aTOmDirectory, analyzer, True,
                                 IndexWriter.MaxFieldLength.UNLIMITED)
        nTOzWriter = IndexWriter(nTOzDirectory, analyzer, True,
                                 IndexWriter.MaxFieldLength.UNLIMITED)

        for animal in animals:
            doc = Document()
            doc.add(
                Field("animal", animal, Field.Store.YES,
                      Field.Index.NOT_ANALYZED))

            if animal[0].lower() < "n":
                aTOmWriter.addDocument(doc)
            else:
                nTOzWriter.addDocument(doc)

        aTOmWriter.close()
        nTOzWriter.close()

        self.searchers = [
            IndexSearcher(aTOmDirectory),
            IndexSearcher(nTOzDirectory)
        ]
Exemplo n.º 9
0
    def setUp(self):

        # set up sample document
        directory = RAMDirectory()
        writer = IndexWriter(directory, WhitespaceAnalyzer(), True,
                             IndexWriter.MaxFieldLength.UNLIMITED)
        doc = Document()
        doc.add(
            Field("field", "the quick brown fox jumped over the lazy dog",
                  Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)
        writer.close()

        self.searcher = IndexSearcher(directory)
Exemplo n.º 10
0
    def setUp(self):

        self.analyzer = WhitespaceAnalyzer()
        self.directory = RAMDirectory()

        writer = IndexWriter(self.directory, self.analyzer, True, 
                             IndexWriter.MaxFieldLength.LIMITED)

        for i in xrange(1, 501):
            doc = Document()
            doc.add(Field("id", NumberUtils.pad(i),
                          Field.Store.YES, Field.Index.NOT_ANALYZED))
            writer.addDocument(doc)

        writer.close()
Exemplo n.º 11
0
    def setUp(self):

        self.directory = RAMDirectory()
        writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        self.addPoint(writer, "El Charro", "restaurant", 1, 2)
        self.addPoint(writer, "Cafe Poca Cosa", "restaurant", 5, 9)
        self.addPoint(writer, "Los Betos", "restaurant", 9, 6)
        self.addPoint(writer, "Nico's Taco Shop", "restaurant", 3, 8)

        writer.close()

        self.searcher = IndexSearcher(self.directory, True)
        self.query = TermQuery(Term("type", "restaurant"))
Exemplo n.º 12
0
    def setUp(self):

        self.directory = RAMDirectory()
        writer = IndexWriter(self.directory, SimpleAnalyzer(), True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        doc = Document()
        doc.add(
            Field("partnum", "Q36", Field.Store.YES, Field.Index.NOT_ANALYZED))
        doc.add(
            Field("description", "Illidium Space Modulator", Field.Store.YES,
                  Field.Index.ANALYZED))
        writer.addDocument(doc)
        writer.close()

        self.searcher = IndexSearcher(self.directory, True)
Exemplo n.º 13
0
    def someMethod(self):

        directory = RAMDirectory()

        analyzer = StandardAnalyzer()
        writer = IndexWriter(directory, analyzer, True)

        doc = Document()
        doc.add(Field.Text("title", "This is the title"))
        doc.add(Field.UnStored("contents", "...document contents..."))
        writer.addDocument(doc)

        writer.addDocument(doc, analyzer)

        expression = "some query"

        query = QueryParser.parse(expression, "contents", analyzer)

        parser = QueryParser("contents", analyzer)
        query = parser.parseQuery(expression)
Exemplo n.º 14
0
    def setUp(self):

        directory = RAMDirectory()
        writer = IndexWriter(directory, WhitespaceAnalyzer(), True,
                             IndexWriter.MaxFieldLength.UNLIMITED)

        doc1 = Document()
        doc1.add(
            Field("field", "the quick brown fox jumped over the lazy dog",
                  Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc1)

        doc2 = Document()
        doc2.add(
            Field("field", "the fast fox hopped over the hound",
                  Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc2)
        writer.close()

        self.searcher = IndexSearcher(directory, True)
Exemplo n.º 15
0
    def _createIndex(self, inputDF, colname):
        """
		function to create lucene index, iterates over inputDF row 
		by row, and indexes the relevant column

		By default - WhitespaceAnalyzer is used, other Analyzers are also available.
		"""

        # Create index directory
        directory = RAMDirectory()
        writer = IndexWriter(directory, WhitespaceAnalyzer(), True,
                             IndexWriter.MaxFieldLength.LIMITED)

        # Inline indexing of column data
        inputDF.apply(lambda x: self._addDoc(x[colname], writer), axis=1)

        # Optimize, close and return
        writer.optimize()
        writer.close()
        return directory
    def getIndexWriter(self):

        writer = IndexWriter(RAMDirectory(), StandardAnalyzer(), True)
        writer.setUseCompoundFile(False)

        return writer
Exemplo n.º 17
0
    def __init__(self, indexDir):

        self.directory = RAMDirectory(SimpleFSDirectory(File(indexDir)))
        self.searcher = IndexSearcher(self.directory)
Exemplo n.º 18
0
 def indexFile(self):
     self._th=lucene.initVM()
     self._analyzer = StandardAnalyzer(Version.LUCENE_36)
     self._dir = RAMDirectory()
     self._writer = IndexWriter(self._dir, self._analyzer, True, IndexWriter.MaxFieldLength(25000))
Exemplo n.º 19
0
 def setUp(self):
     self.directory = RAMDirectory()
Exemplo n.º 20
0
    def setUp(self):

        super(ScoreTest, self).setUp()
        self.directory = RAMDirectory()