def get_followers(self): follower_page_url = self.url + '/followers' r = requests.get(follower_page_url) text = r.text soup = BeautifulSoup(text) hash_id = get_hash_id(soup) _xsrf = get_xsrf(soup) scroll_loader = ScrollLoader( "post", "http://www.zhihu.com/node/ProfileFollowersListV2", 20, _xsrf, hash_id) for response in scroll_loader.run(): for each in response: text += each follower_url_list = re.findall( r'<a[^>]+href=\"([^>]*)\"\x20class=\"zg-link\"', text) for url in follower_url_list: yield User(url)
def get_followeing_topics(self): url = self.url + '/topics' r = requests.get(url) soup = BeautifulSoup(r.content) _xsrf = get_xsrf(soup) text = r.text scroll_loader = ScrollLoader("post", url, 20, _xsrf=_xsrf, start=0) for response in scroll_loader.run(): for each in response: text += each topic_list = re.findall( r'<a\x20class=\"zm-list-avatar-link\"\x20href=\"([^>]*)\">', text) from Topic import Topic for url in topic_list: #if not topicBloom.is_element_exist(url): # topicBloom.insert_element(url) yield Topic("http://www.zhihu.com" + url)
def get_followers(self): url = self.url + '/followers' r = requests.get(url) soup = BeautifulSoup(r.content) _xsrf = get_xsrf(soup) text = r.text scroll_loader = ScrollLoader("post", url, 20, _xsrf=_xsrf, start=0) for response in scroll_loader.run(): for each in response: text += each user_list = re.findall( r'<a[^>]*\nclass=\"zm-item-link-avatar\"\nhref=\"([^>]*)\">', text) from User import User for url in user_list: user_url = "http://www.zhihu.com" + url #if not userBloom.is_element_exist(user_url): # userBloom.insert_element(user_url) yield User(user_url)