示例#1
0
def func_pic(command):
    global vm_env
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    STORE_DIR = "index for pic"
    directory = SimpleFSDirectory(File(STORE_DIR))
    searcher = IndexSearcher(directory, True)
    analyzer = lucene.WhitespaceAnalyzer(Version.LUCENE_CURRENT)
    imgurl = []
    url = []
    urltitle = []
    imgurl, url, urltitle = run_pic(command, searcher, analyzer)
    searcher.close()
    return imgurl, url, urltitle
示例#2
0
def func(command):
    global vm_env
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    STORE_DIR = "index for website"
    directory = SimpleFSDirectory(File(STORE_DIR))
    searcher = IndexSearcher(directory, True)
    analyzer = lucene.WhitespaceAnalyzer(Version.LUCENE_CURRENT)
    title = []
    url = []
    surround = []
    title, url, surround = run(searcher, analyzer, command)
    searcher.close()
    return title, url, surround
示例#3
0
        print "Searching for:", command

        query = QueryParser(Version.LUCENE_CURRENT, "contents",
                            analyzer).parse(command)
        #用analyzer来对查询语句进行词法分析和语言处理。
        #QueryParser调用parser进行语法分析,形成查询语法树,放到Query中。
        scoreDocs = searcher.search(query, 50).scoreDocs
        #IndexSearcher调用search对查询语法树Query进行搜索,得到结果
        print "%s total matching documents." % len(scoreDocs), '\n'

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            print "------------------------"
            print 'path:', doc.get("path")
            print 'title:', doc.get("title")
            print 'url:', doc.get("url")
            print 'name:', doc.get("name")


if __name__ == '__main__':
    STORE_DIR = "index"
    initVM()
    print 'lucene', VERSION
    directory = SimpleFSDirectory(File(STORE_DIR))  #索引文件存放的位置
    searcher = IndexSearcher(directory, True)  #索引信息读入到内存,创建IndexSearcher准备进行搜索
    analyzer = lucene.WhitespaceAnalyzer(
        Version.LUCENE_CURRENT
    )  #analyzer用来对查询语句进行词法分析和语言处理的,和IndexFiles.py中使用同样的analyzer。
    run(searcher, analyzer)
    searcher.close()
示例#4
0
                                 lucene.Field.Store.YES,
                                 lucene.Field.Index.NOT_ANALYZED))
                qestion.add(
                    lucene.Field("qst_follow", qst_follow,
                                 lucene.Field.Store.YES,
                                 lucene.Field.Index.NOT_ANALYZED))
                qestion.add(
                    lucene.Field("qst_ans", qst_ans, lucene.Field.Store.YES,
                                 lucene.Field.Index.NOT_ANALYZED))
                qestion.add(
                    lucene.Field("qst_num", qst_num, lucene.Field.Store.YES,
                                 lucene.Field.Index.NOT_ANALYZED))
                writer.addDocument(qestion)
            except Exception, e:
                print "Failed in indexDocs:", e
        f.close()


if __name__ == '__main__':

    lucene.initVM()
    print 'lucene', lucene.VERSION
    start = datetime.now()
    try:
        IndexFiles('analyzed_zhihu', "index_qst",
                   lucene.WhitespaceAnalyzer(lucene.Version.LUCENE_CURRENT))
        end = datetime.now()
        print end - start
    except Exception, e:
        print "Failed: ", e
示例#5
0
                                             lucene.Field.Index.ANALYZED))
                        doc.add(lucene.Field("url", url,
                                             lucene.Field.Store.YES,
                                             lucene.Field.Index.NOT_ANALYZED))
                        doc.add(lucene.Field("urltitle", title,
                                             lucene.Field.Store.YES,
                                             lucene.Field.Index.NOT_ANALYZED))
                    writer.addDocument(doc) #IndexWriter调用函数addDocument将索引写到索引文件夹中
                    print "----------------------------------------------------"
                except Exception, e:
                    print "Failed in indexDocs:", e
            else:
                break
        t.close()

if __name__ == '__main__':
##    if len(sys.argv) < 2:
##        print IndexFiles.__doc__
##        sys.exit(1)
    lucene.initVM() #初始化Java虚拟机
    print 'lucene', lucene.VERSION
    start = datetime.now()
    try:
##        IndexFiles(sys.argv[1], "index", lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT))
#        IndexFiles('html', "index", lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT))
        IndexFiles('html', "index for pic", lucene.WhitespaceAnalyzer(lucene.Version.LUCENE_CURRENT))
        end = datetime.now()
        print end - start
    except Exception, e:
        print "Failed: ", e