示例#1
0
    def generate_user_seeds(self, request_times=1, user_accessed_set=None):
        if self._url is None:
            print "I'm anonymous user."
            return 0
        else:
            if self.soup is None:
                self.parser()
            soup = self.soup
        seed_list = []
        for i in range(request_times):
            post_url = "https://www.zhihu.com/lookup/suggest_member"
            _xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"]
            data = {
                'ids': ",,",
                '_xsrf': _xsrf
            }
            post_data = urlencode(data)
            r_post = zhihu_util.post(post_url, post_data)
            suggent_member_list = json.loads(r_post)["msg"]
            for suggent_member in suggent_member_list:
                suggent_member_soup = zhihu_util.get_soup(suggent_member)
                suggent_member_str = suggent_member_soup.find("a", class_="image-link")\
                                                        .get("href").split("/")[-1]
                seed_list.append(suggent_member_str)

        seed_set = set(seed_list)
        if user_accessed_set:
            seed_set.difference_update(user_accessed_set)
        return seed_set
示例#2
0
    def test_post_repeatedly(self):
        count = 0
        post_url = 'https://www.zhihu.com/node/TopicsPlazzaListV2'
        post_data = 'method=next&params=%7B%22topic_id%22%3A686%2C%22offset%22%3A80%2C%22hash_id%22%3A%22dced108689287057f5cc3b5e85cb8289%22%7D&_xsrf=c6946d5914172133e875956a711be3ad'

        while count < 3:
            response = zhihu_util.post(post_url, post_data)
            print "...post count:%s" % count
            self.assertTrue(response != "FAIL", "post count is {0}".format(count))
            count += 1
示例#3
0
    def get_followees(self):
        if self._url is None:
            print "I'm anonymous user."
            return
            yield
        else:
            followees_num = self.get_followees_num()
            if followees_num == 0:
                return
                yield
            else:
                followee_url = self._url + "/followees"
                r = zhihu_util.get_content(followee_url)
                # print "r:%s" % r
                soup = zhihu_util.get_soup(r)
                for i in xrange((followees_num - 1) / 20 + 1):
                    if i == 0:
                        user_url_list = soup.find_all("h2", class_="zm-list-content-title")
                        for j in xrange(min(followees_num, 20)):
                            try:
                                yield User(user_url_list[j].a["href"],
                                           user_url_list[j].a.string.encode("utf-8"))
                            except:
                                print("...get followee error ,just skip...")
                                return
                                yield
                    else:
                        post_url = "http://www.zhihu.com/node/ProfileFolloweesListV2"
                        _xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"]
                        offset = i * 20
                        hash_id = re.findall("hash_id&quot;: &quot;(.*)&quot;},", r)[0]
                        params = json.dumps(
                            {"offset": offset, "order_by": "created", "hash_id": hash_id})
                        data = {
                            '_xsrf': _xsrf,
                            'method': "next",
                            'params': params
                        }
                        post_data = urlencode(data)
                        r_post = zhihu_util.post(post_url, post_data)

                        followee_list = json.loads(r_post)["msg"]
                        for j in xrange(min(followees_num - i * 20, 20)):
                            try:
                                followee_soup = zhihu_util.get_soup(followee_list[j])

                                user_link = followee_soup.find("h2", class_="zm-list-content-title").a
                                yield User(user_link["href"], user_link.string.encode("utf-8"))
                            except:
                                print("...get followee error ,just skip...")
                                return
                                yield
示例#4
0
 def test_post(self):
     post_url = 'https://www.zhihu.com/node/TopicsPlazzaListV2'
     post_data = 'method=next&params=%7B%22topic_id%22%3A253%2C%22offset%22%3A60%2C%22hash_id%22%3A%22dced108689287057f5cc3b5e85cb8289%22%7D&_xsrf=c6946d5914172133e875956a711be3ad'
     zhihu_util.post(post_url, post_data)