Пример #1
0
 def run(self):
     """
     多线程的入口函数
     """
     crawler = BlogCrawler()
     #crawler = UserCrawler()
     while not Controller.taskpool.empty():
         uid = Controller.taskpool.get()
         print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count,
                                            Controller.taskpool.qsize())
         #print uid
         try:
             print 'task start'
             #userinfo_dic={'username':userid}
             url = 'http://weibo.com/u/1340714021'
             #url='http://weibo.com/u/1756439121'
             #url = 'http://weibo.com/caikangyong'
             #url = 'http://weibo.com/u/1704116960'
             #url = 'http://weibo.com/u/1730336902'
             #userinfo = crawler.scratch(uid)
             blogs = crawler.scratch(url)
             Controller.save_csv(blogs, uid)
             print 'task end'
         except:
             print uid
         Controller.finished_count += 1
 def run(self):
     """
     多线程的入口函数
     """
     crawler = BlogCrawler()
     #crawler = UserCrawler()
     while not Controller.taskpool.empty():
         un = Controller.taskpool.get()
         print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count,
                                            Controller.taskpool.qsize())
         #print uns
         try:
             urls = get_urls(get_uns_uids(config.UID_FILEPATH)[1])
             uns = get_uns_uids(config.UID_FILEPATH)[0]
             #print urls,'testing........'
             print 'task start'
             #userinfo_dic={'username':userid}
             #url = 'http://weibo.com/u/1340714021'
             #url='http://weibo.com/u/1756439121'
             #url = 'http://weibo.com/caikangyong'
             #url = 'http://weibo.com/u/1704116960'
             #url = 'http://weibo.com/u/1730336902'
             #userinfo = crawler.scratch(un)
             #Controller.save_userinfo(userinfo,un)
             print 'crawlering %s th bloger....' % (uns.index(un) + 1)
             blogs = crawler.scratch(urls[uns.index(un)])
             Controller.save_csv(blogs, un)
             print 'task end'
         except:
             print un
         Controller.finished_count += 1