def scratch(self, url, start_pageindex=1): """ 获取给定用户的所有满足条件的微博,并写入文件 ---------------------------------------- uid: 待抓取微博数据的用户ID start_pageindex: 从第几页开始抓取用户的微博数据 """ self._init_(url) from controller import Controller # print Controller._get_filepath(self.uid) if os.path.isfile(Controller._get_filepath(self.uid)): # 用户微博已下载 print self.uid, u'用户的微博已下载!' return None if start_pageindex > self.pagenum: return [] #return self._binary_scratch(uid, start_pageindex) return self._sequence_scratch(self.uid, start_pageindex, self.pagenum)
def scratch(self, url, start_pageindex=1): """ 获取给定用户的所有满足条件的微博,并写入文件 ---------------------------------------- uid: 待抓取微博数据的用户ID start_pageindex: 从第几页开始抓取用户的微博数据 """ self._init_(url) from controller import Controller # print Controller._get_filepath(self.uid) if os.path.isfile(Controller._get_filepath(self.uid)): # 用户微博已下载 print self.uid, u'用户的微博已下载!' return None if start_pageindex > self.pagenum: return [] #return self._binary_scratch(uid, start_pageindex) return self._sequence_scratch(self.uid, start_pageindex, self.pagenum)
def scratch(self, url, start_pageindex=1): """ 获取给定用户的所有满足条件的微博,并写入文件 ---------------------------------------- uid: 待抓取微博数据的用户ID start_pageindex: 从第几页开始抓取用户的微博数据 """ self._init_(url) from controller import Controller # print Controller._get_filepath(self.uid) if os.path.isfile(Controller._get_filepath(self.uid)): # 用户微博已下载 print self.uid, u'用户的微博已下载!' return None if start_pageindex > self.pagenum: return [] #return self._binary_scratch(uid, start_pageindex) # the following two lines are added by haiyang to limit the pages downloaded if self.pagenum > 100: self.pagenum = 100 return self._sequence_scratch(self.uid, start_pageindex, self.pagenum)
def scratch(self, url, start_pageindex=1): """ 获取给定用户的所有满足条件的微博,并写入文件 ---------------------------------------- uid: 待抓取微博数据的用户ID start_pageindex: 从第几页开始抓取用户的微博数据 """ self._init_(url) from controller import Controller # print Controller._get_filepath(self.uid) if os.path.isfile(Controller._get_filepath(self.uid)): # 用户微博已下载 print self.uid, u'用户的微博已下载!' return None if start_pageindex > self.pagenum: return [] #return self._binary_scratch(uid, start_pageindex) # the following two lines are added by haiyang to limit the pages downloaded if self.pagenum>100: self.pagenum=100 return self._sequence_scratch(self.uid, start_pageindex,self.pagenum)