Python Controller._get_filepath示例

 def scratch(self, url, start_pageindex=1):
     """
     获取给定用户的所有满足条件的微博,并写入文件
     ----------------------------------------
     uid: 待抓取微博数据的用户ID
     start_pageindex: 从第几页开始抓取用户的微博数据
     """
     self._init_(url)
     from controller import Controller
     # print Controller._get_filepath(self.uid)
     if os.path.isfile(Controller._get_filepath(self.uid)):  # 用户微博已下载
         print self.uid, u'用户的微博已下载！'
         return None
     if start_pageindex > self.pagenum:
         return []
     #return self._binary_scratch(uid, start_pageindex)
     return self._sequence_scratch(self.uid, start_pageindex, self.pagenum)

示例#2

显示文件

文件： blogcrawler.py 项目： AugustLONG/sina_weibo_crawler

 def scratch(self, url, start_pageindex=1):
     """
     获取给定用户的所有满足条件的微博,并写入文件
     ----------------------------------------
     uid: 待抓取微博数据的用户ID
     start_pageindex: 从第几页开始抓取用户的微博数据
     """
     self._init_(url)
     from controller import Controller
     # print Controller._get_filepath(self.uid)
     if os.path.isfile(Controller._get_filepath(self.uid)):  # 用户微博已下载
         print self.uid, u'用户的微博已下载！'
         return None
     if start_pageindex > self.pagenum:
         return []
     #return self._binary_scratch(uid, start_pageindex)
     return self._sequence_scratch(self.uid, start_pageindex, self.pagenum)

示例#3

显示文件

文件： blogcrawler.py 项目： coolspiderghy/sina_weibo_crawler

 def scratch(self, url, start_pageindex=1):
     """
     获取给定用户的所有满足条件的微博,并写入文件
     ----------------------------------------
     uid: 待抓取微博数据的用户ID
     start_pageindex: 从第几页开始抓取用户的微博数据
     """
     self._init_(url)
     from controller import Controller
     # print Controller._get_filepath(self.uid)
     if os.path.isfile(Controller._get_filepath(self.uid)):  # 用户微博已下载
         print self.uid, u'用户的微博已下载！'
         return None
     if start_pageindex > self.pagenum:
         return []
     #return self._binary_scratch(uid, start_pageindex)
     # the following two lines are added by haiyang to limit the pages downloaded
     if self.pagenum > 100:
         self.pagenum = 100
     return self._sequence_scratch(self.uid, start_pageindex, self.pagenum)

示例#4

显示文件

文件： blogcrawler.py 项目： coolspiderghy/sina_weibo_crawler

 def scratch(self, url, start_pageindex=1):
     """
     获取给定用户的所有满足条件的微博,并写入文件
     ----------------------------------------
     uid: 待抓取微博数据的用户ID
     start_pageindex: 从第几页开始抓取用户的微博数据
     """
     self._init_(url)
     from controller import Controller
     # print Controller._get_filepath(self.uid)
     if os.path.isfile(Controller._get_filepath(self.uid)):  # 用户微博已下载
         print self.uid, u'用户的微博已下载！'
         return None
     if start_pageindex > self.pagenum:
         return []
     #return self._binary_scratch(uid, start_pageindex)
     # the following two lines are added by haiyang to limit the pages downloaded
     if self.pagenum>100:
         self.pagenum=100
     return self._sequence_scratch(self.uid, start_pageindex,self.pagenum)