示例#1
0
    def main(cls, argv):

        allBooks = MatchAllDocsQuery()
        parser = QueryParser(Version.LUCENE_CURRENT, "contents",
                             StandardAnalyzer(Version.LUCENE_CURRENT))
        query = BooleanQuery()
        query.add(allBooks, BooleanClause.Occur.SHOULD)
        query.add(parser.parse("java OR action"), BooleanClause.Occur.SHOULD)

        indexDir = System.getProperty("index.dir")
        directory = SimpleFSDirectory(File(indexDir))

        example = SortingExample(directory)

        example.displayResults(query, Sort.RELEVANCE)
        example.displayResults(query, Sort.INDEXORDER)
        example.displayResults(query,
                               Sort(SortField("category", SortField.STRING)))
        example.displayResults(query,
                               Sort(SortField("pubmonth", SortField.INT, True)))

        example.displayResults(query,
                               Sort([SortField("category", SortField.STRING),
                                     SortField.FIELD_SCORE,
                                     SortField("pubmonth", SortField.INT, True)]))

        example.displayResults(query,
                               Sort([SortField.FIELD_SCORE,
                                     SortField("category", SortField.STRING)]))
        directory.close()
示例#2
0
class LiaTestCase(TestCase):
    
    TEST_VERSION = Version.LUCENE_CURRENT

    def __init__(self, *args):

        super(LiaTestCase, self).__init__(*args)
        self.indexDir = System.getProperty("index.dir")

    def setUp(self):

        self.directory = SimpleFSDirectory(File(self.indexDir))

    def tearDown(self):
        self.directory.close()
        
        
    def getWriter(self, directory=None, analyzer=None, open_mode=None):
        config = IndexWriterConfig(self.TEST_VERSION,
                    analyzer or LimitTokenCountAnalyzer(WhitespaceAnalyzer(Version.LUCENE_CURRENT), 10000)
                    )
        config.setOpenMode(open_mode or IndexWriterConfig.OpenMode.CREATE)
        return IndexWriter(directory or self.directory, config)
    
        
    def getSearcher(self, directory=None, reader=None):
        if reader is not None:
            return IndexSearcher(reader)
        
        return IndexSearcher(DirectoryReader.open(directory or self.directory))
        
        
    #
    # For troubleshooting
    #
    def dumpHits(self, searcher, scoreDocs):

        if not scoreDocs:
            print "No hits"
        else:
            for scoreDoc in scoreDocs:
                print "%s: %s" %(scoreDoc.score,
                                 searcher.doc(scoreDoc.doc).get('title'))

    def assertHitsIncludeTitle(self, searcher, scoreDocs, title,
                               fail=False):

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            if title == doc.get("title"):
                if fail:
                    self.fail("title '%s' found" %(title))
                return

        if not fail:
            self.fail("title '%s' not found" %(title))

    def parseDate(self, s):

        return SimpleDateFormat("yyyy-MM-dd").parse(s)
示例#3
0
    def index(cls, indexDir, dataDir):

        if not (os.path.exists(dataDir) and os.path.isdir(dataDir)):
            raise IOError, "%s does not exist or is not a directory" %(dataDir)

        dir = SimpleFSDirectory(File(indexDir))
        writer = IndexWriter(dir, StandardAnalyzer(Version.LUCENE_CURRENT),
                             True, IndexWriter.MaxFieldLength.LIMITED)
        writer.setUseCompoundFile(False)

        cls.indexDirectory(writer, dataDir)

        numIndexed = writer.numDocs()
        writer.commit()
        writer.close()
        dir.close()

        return numIndexed
class LiaTestCase(TestCase):

    def __init__(self, *args):

        super(LiaTestCase, self).__init__(*args)
        self.indexDir = os.environ["index.dir"]

    def setUp(self):

        self.directory = SimpleFSDirectory(self.indexDir)

    def tearDown(self):

        self.directory.close()

    #
    # For troubleshooting
    #
    def dumpHits(self, searcher, scoreDocs):

        if not scoreDocs:
            print "No hits"
        else:
            for scoreDoc in scoreDocs:
                print "%s: %s" %(scoreDoc.score,
                                 searcher.doc(scoreDoc.doc).get('title'))

    def assertHitsIncludeTitle(self, searcher, scoreDocs, title,
                               fail=False):

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            if title == doc.get("title"):
                if fail:
                    self.fail("title '%s' found" %(title))
                return

        if not fail:
            self.fail("title '%s' not found" %(title))

    def parseDate(self, s):

        return datetime.date("yyyy-MM-dd")
示例#5
0
    def index(cls, indexDir, dataDir):

        if not (os.path.exists(dataDir) and os.path.isdir(dataDir)):
            raise IOError, "%s does not exist or is not a directory" % (
                dataDir)

        dir = SimpleFSDirectory(File(indexDir))
        writer = IndexWriter(dir, StandardAnalyzer(Version.LUCENE_CURRENT),
                             True, IndexWriter.MaxFieldLength.LIMITED)
        writer.setUseCompoundFile(False)

        cls.indexDirectory(writer, dataDir)

        numIndexed = writer.numDocs()
        writer.optimize()
        writer.close()
        dir.close()

        return numIndexed
示例#6
0
class LiaTestCase(TestCase):
    def __init__(self, *args):

        super(LiaTestCase, self).__init__(*args)
        self.indexDir = System.getProperty("index.dir")

    def setUp(self):

        self.directory = SimpleFSDirectory(File(self.indexDir))

    def tearDown(self):

        self.directory.close()

    #
    # For troubleshooting
    #
    def dumpHits(self, searcher, scoreDocs):

        if not scoreDocs:
            print "No hits"
        else:
            for scoreDoc in scoreDocs:
                print "%s: %s" % (scoreDoc.score, searcher.doc(
                    scoreDoc.doc).get('title'))

    def assertHitsIncludeTitle(self, searcher, scoreDocs, title, fail=False):

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            if title == doc.get("title"):
                if fail:
                    self.fail("title '%s' found" % (title))
                return

        if not fail:
            self.fail("title '%s' not found" % (title))

    def parseDate(self, s):

        return SimpleDateFormat("yyyy-MM-dd").parse(s)