def run(self):
     """
     多线程的入口函数
     """
     crawler = BlogCrawler()
     # crawler = UserCrawler()
     while not Controller.taskpool.empty():
         un = Controller.taskpool.get()
         print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count, Controller.taskpool.qsize())
         # print uns
         try:
             urls = get_urls(get_uns_uids(config.UID_FILEPATH)[1])
             uns = get_uns_uids(config.UID_FILEPATH)[0]
             # print urls,'testing........'
             print "task start"
             # userinfo_dic={'username':userid}
             # url = 'http://weibo.com/u/1340714021'
             # url='http://weibo.com/u/1756439121'
             # url = 'http://weibo.com/caikangyong'
             # url = 'http://weibo.com/u/1704116960'
             # url = 'http://weibo.com/u/1730336902'
             # userinfo = crawler.scratch(un)
             # Controller.save_userinfo(userinfo,un)
             print "crawlering %s th bloger...." % (uns.index(un) + 1)
             blogs = crawler.scratch(urls[uns.index(un)])
             Controller.save_csv(blogs, un)
             print "task end"
         except:
             print un
         Controller.finished_count += 1
Пример #2
0
 def run(self):
     """
     多线程的入口函数
     """
     crawler = BlogCrawler()
     #crawler = UserCrawler()
     while not Controller.taskpool.empty():
         uid = Controller.taskpool.get()
         print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count,
                                            Controller.taskpool.qsize())
         #print uid
         try:
             print 'task start'
             #userinfo_dic={'username':userid}
             url = 'http://weibo.com/u/1340714021'
             #url='http://weibo.com/u/1756439121'
             #url = 'http://weibo.com/caikangyong'
             #url = 'http://weibo.com/u/1704116960'
             #url = 'http://weibo.com/u/1730336902'
             #userinfo = crawler.scratch(uid)
             blogs = crawler.scratch(url)
             Controller.save_csv(blogs, uid)
             print 'task end'
         except:
             print uid
         Controller.finished_count += 1
    def run(self):
        """
        多线程的入口函数
        """
        crawler = BlogCrawler()
        #crawler = UserCrawler()
        while not Controller.taskpool.empty():
            uid = Controller.taskpool.get()
            print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count, Controller.taskpool.qsize())
            #print uid
            try:
		print 'task start'
                #userinfo_dic={'username':userid}
                url = 'http://weibo.com/u/1340714021'
                #url='http://weibo.com/u/1756439121'
                #url = 'http://weibo.com/caikangyong'
                #url = 'http://weibo.com/u/1704116960'
                #url = 'http://weibo.com/u/1730336902'
                #userinfo = crawler.scratch(uid)
                blogs = crawler.scratch(url)
                Controller.save_csv(blogs, uid)
		print 'task end'
            except:
                print uid
            Controller.finished_count += 1
 def run(self):
     """
     多线程的入口函数
     """
     crawler = BlogCrawler()
     #crawler = UserCrawler()
     while not Controller.taskpool.empty():
         un = Controller.taskpool.get()
         print "\n已处理 %d 个任务, 还剩 %d 个任务" % (Controller.finished_count,
                                            Controller.taskpool.qsize())
         #print uns
         try:
             urls = get_urls(get_uns_uids(config.UID_FILEPATH)[1])
             uns = get_uns_uids(config.UID_FILEPATH)[0]
             #print urls,'testing........'
             print 'task start'
             #userinfo_dic={'username':userid}
             #url = 'http://weibo.com/u/1340714021'
             #url='http://weibo.com/u/1756439121'
             #url = 'http://weibo.com/caikangyong'
             #url = 'http://weibo.com/u/1704116960'
             #url = 'http://weibo.com/u/1730336902'
             #userinfo = crawler.scratch(un)
             #Controller.save_userinfo(userinfo,un)
             print 'crawlering %s th bloger....' % (uns.index(un) + 1)
             blogs = crawler.scratch(urls[uns.index(un)])
             Controller.save_csv(blogs, un)
             print 'task end'
         except:
             print un
         Controller.finished_count += 1