示例#1
0
def callbackfunc(request,result):
    res,resource,pagebuf = result
    if pagebuf == None :
        return
    
    hreflist = fetchPage.parsePage(pagebuf, resource)
    for href in hreflist :
        if PAGESCache.get(href,None) == None : PAGESCache[href] = True 
        else : continue
        hostname,filename = fetchPage.parse(href)
        main.putRequest(threadpool.WorkRequest(fetchPage.downPage,args=[hostname,filename],kwds={},callback=callbackfunc))
    fetchPage.dealwithResult(res,resource)
示例#2
0
文件: run.py 项目: qz2501/miniCrowler
def callbackfunc(request,result):
    res,resource,pagebuf = result
    if pagebuf == None :
        return
    
    hreflist = fetchPage.parsePage(pagebuf, resource)
    for href in hreflist :
        if PAGESCache.get(href,None) == None : PAGESCache[href] = True 
        else : continue
        hostname,filename = fetchPage.parse(href)
        main.putRequest(threadpool.WorkRequest(fetchPage.downPage,args=[hostname,filename],kwds={},callback=callbackfunc))
    fetchPage.dealwithResult(res,resource)
示例#3
0
def usingOneThread(limit):
    urlset = open("input.txt","r")
    start = datetime.datetime.now()
    for u in urlset:
        if limit <= 0 : break
        limit-=1
        hostname , filename = fetchPage.parse(u)
        res= fetchPage.downPage(hostname,filename,0)
        fetchPage.dealwithResult(res)
    end = datetime.datetime.now()
    print "Start at :\t" , start
    print "End at :\t" , end
    print "Total Cost :\t" , end - start
    print 'Total fetched :', statistics.fetched_url
示例#4
0
def callbackfunc(request,result):
    fetchPage.dealwithResult(result[0],result[1])