示例#1
0
 def getCompelitedSeListUrl(self, spiderName):
     '''
     已经完成的搜索引擎list页的总数
     '''
     whereJson = {"status": {"$lte": 400}, "priority": {"$lt": 999}}
     return mongoApt.countByWhere(self.urlDbnamekey,
                                  self.urlCollectionsMap[spiderName],
                                  whereJson=whereJson)
示例#2
0
 def countCrawledUrlsBySpidername(self, spiderName):
     '''
     爬虫诊断 计算指定爬虫已下载网页数量
     '''
     whereJson = {'status': {'$lt': 900, '$gt': 0}}
     crawlNum = mongoApt.countByWhere(self.urlDbnamekey,
                                      self.urlCollectionsMap[spiderName],
                                      whereJson=whereJson)
     return crawlNum
示例#3
0
 def countUncrawlUrlsBySpidername(self, spiderName):
     '''
     爬虫诊断 计算指定爬虫待下载网页数量
     '''
     whereJson = {'status': {'$gte': 900}}
     uncrawlNum = mongoApt.countByWhere(self.urlDbnamekey,
                                        self.urlCollectionsMap[spiderName],
                                        whereJson=whereJson)
     return uncrawlNum
示例#4
0
 def countItemsBySpidername(self, spiderName):
     '''
     爬虫诊断 计算指定爬虫已下载item页数量
     '''
     whereJson = {'status': {'$gt': 0}}
     colName = 'Page'
     itemNum = mongoApt.countByWhere(spiderName,
                                     colName,
                                     whereJson=whereJson)
     return itemNum
示例#5
0
 def countUncrawlUrls(self):
     '''
     总剩余待爬取的网页数量
     '''
     whereJson = {'status': {'$gt': 900}}
     uncrawlNum = 0
     for k in self.urlCollectionsMap.keys():
         uncrawlNum += mongoApt.countByWhere(self.urlDbnamekey,
                                             k,
                                             whereJson=whereJson)
     return uncrawlNum
示例#6
0
 def countErrorStatusUrls(self):
     '''
     总下载失败网页数量
     '''
     whereJson = {'status': {'$gte': 400, '$lt': 900}}
     errorUrlNum = 0
     for k in self.urlCollectionsMap.keys():
         errorUrlNum += mongoApt.countByWhere(self.urlDbnamekey,
                                              k,
                                              whereJson=whereJson)
     return errorUrlNum