self.user['answers'] = int(d('.profile-navbar a.item').eq(2).find('span.num').text()) self.user['posts'] = int(d('.profile-navbar a.item').eq(3).find('span.num').text()) self.user['collections'] = int(d('.profile-navbar a.item').eq(4).find('span.num').text()) self.user['logs'] = int(d('.profile-navbar a.item').eq(5).find('span.num').text()) self.user['followings_num'] = int(d('.zm-profile-side-following a.item').eq(0).find('strong').text()) self.user['followers_num'] = int(d('.zm-profile-side-following a.item').eq(1).find('strong').text()) self.user['weibo'] = d('a.zm-profile-header-user-weibo').attr('href') or '' def get(self): self.logger.warning('Start crawling %s', self.user_domain) self.get_followings_page() self.get_profile() while True: html = self.get_followings_json(len(self.user['followings'])) if html: self.get_followings_from_html(html) else: break time.sleep(random.uniform(0.5, 2)) self.logger.warning('Finish crawling %s, %s followings', self.user_domain, len(self.user['followings'])) return self.user if __name__ == '__main__': logging.basicConfig(level=logging.INFO) s = Session.get() fc = FollowingsCrawler(s, 'zhang-wen-wen-17') print(fc.get())
def __init__(self): self.session = Session.get() self.logger = logging.getLogger(__name__)
d('.profile-navbar a.item').eq(4).find('span.num').text()) self.user['logs'] = int( d('.profile-navbar a.item').eq(5).find('span.num').text()) self.user['followings_num'] = int( d('.zm-profile-side-following a.item').eq(0).find('strong').text()) self.user['followers_num'] = int( d('.zm-profile-side-following a.item').eq(1).find('strong').text()) self.user['weibo'] = d('a.zm-profile-header-user-weibo').attr( 'href') or '' def get(self): self.logger.warning('Start crawling %s', self.user_domain) self.get_followings_page() self.get_profile() self.logger.warning('Finish crawling profile for %s,', self.user_domain) return self.user if __name__ == '__main__': config.set_config('env', 'dev') logging.basicConfig(level=logging.INFO) s = Session.get() pc = ProfileCrawler(s, 'jonas-lu') user = pc.get() for p in user: print(p, user[p])