示例#1
0
def main():
    results={}
    global luceneSearcher
    #logging instantiate
    logPath = PATH+'/luceneSearcher.log'
    log('luceneSearcher', logPath)
    luceneSearcher = logging.getLogger('luceneSearcher')
    luceneSearcher.info('Processing Lucene...')
    configFile = '/config.cfg'
    pathVector = readData(configFile, '=')
    expectedResultsString=''
    
    for i in range(0, len(pathVector)):
        if pathVector[i][0] == "DOCS":
            dictionary=readXML(PATH+str(pathVector[i][1]).strip())
            for k in dictionary.keys():
                indexer(k, dictionary[k])
        elif pathVector[i][0] == "QUERIES":    
            queries = readData(str(pathVector[i][1]).strip(), ';')
            for q in queries:
                results.update({q[0].replace(' ',''): retriever(q[0].replace(' ',''), q[1])})
        elif pathVector[i][0] == "EXPECTED":
            expectedResultsString = readData(str(pathVector[i][1]).strip(), ';')
    
    executeEvaluator(PATH, '/REPORT.txt', expectedResultsString, results)
示例#2
0
	def __init__(self,urllist = []):
		self.spider = spider()
		self.indexer = indexer()
		self.parser = parser()
		self.urllist = urllist
示例#3
0
		self.spider = spider()
		self.indexer = indexer()
		self.parser = parser()
		self.urllist = urllist
	
	def start(self):
		if len(self.urllist) == 0:
			return False
		self.spider.addurllist(self.urllist)
		self.spider.setparser(self.parser)
		self.spider.setindexer(self.indexer)
		spider.run()
		return True

	def cleanup(self):
		self.indexer.closedb()
	

if __name__ == "__main__":

	spider = spider()
	#spider.addurl('http://localhost:9080/setest/test.php')
	spider.addurl('http://hq.booksarefun.com/')
	parserobj = parser()
	indexobj = indexer()
	spider.setparser(parserobj)
	spider.setindexer(indexobj)
	spider.run()
	indexobj.closedb()
	print 'done!'
示例#4
0
 def __init__(self, urllist=[]):
     self.spider = spider()
     self.indexer = indexer()
     self.parser = parser()
     self.urllist = urllist
示例#5
0
        self.spider = spider()
        self.indexer = indexer()
        self.parser = parser()
        self.urllist = urllist

    def start(self):
        if len(self.urllist) == 0:
            return False
        self.spider.addurllist(self.urllist)
        self.spider.setparser(self.parser)
        self.spider.setindexer(self.indexer)
        spider.run()
        return True

    def cleanup(self):
        self.indexer.closedb()


if __name__ == "__main__":

    spider = spider()
    #spider.addurl('http://localhost:9080/setest/test.php')
    spider.addurl('http://hq.booksarefun.com/')
    parserobj = parser()
    indexobj = indexer()
    spider.setparser(parserobj)
    spider.setindexer(indexobj)
    spider.run()
    indexobj.closedb()
    print 'done!'