示例#1
0
    def reindex():
        #defines our indexing limit (N)
        limit = 100
        root = "http://lyle.smu.edu/~fmoore/"
        myCrawl = Crawler(root, limit)
        myCrawl.startCrawl()

        #creating our reverse index
        script_dir = os.path.dirname(os.path.abspath(__file__))
        stopWords = './assets/stopwords.txt'
        stopWords = open(os.path.join(script_dir, stopWords)).read().splitlines()
        myIndex = Indexer(myCrawl.getTreeIndex(), stopWords).createReverseIndex().saveIndex('assets/reverse_index.json').savePageDigests('assets/page_digests.json')
        return True