def test_get_fans(self): from page_parse.user import public with open('./tests/fans.html') as f: cont = f.read() public.get_fans_or_follows(cont) ids, cur_urls = public.get_fans_or_follows(cont) self.assertEqual(len(ids), 9) self.assertEqual(len(cur_urls), 5)
def test_get_fans(self): """ 测试用户粉丝获取功能 """ from page_parse.user import public with open('./tests/fans.html', encoding='utf-8') as f: cont = f.read() public.get_fans_or_follows(cont) ids = public.get_fans_or_follows(cont) self.assertEqual(len(ids), 9)
def get_fans_or_followers_ids(user_id, crawl_type, verify_type): """ Get followers or fans :param user_id: user id :param crawl_type: 1 stands for fans, 2 stands for follows :param verify_type: 1 stands for 100505(normal users), 2 stands for 100606(special users,such as writers) :return: lists of fans or followers """ # todo deal with conditions that fans and followers more than 5 pages if crawl_type == 1 and verify_type == 1: fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60' elif crawl_type == 2 and verify_type == 1: fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60' elif crawl_type == 1 and verify_type == 2: fans_or_follows_url = 'http://weibo.com/p/100606{}/follow?relate=fans&page={}#Pl_Official_HisRelation__47' elif crawl_type == 2 and verify_type == 2: fans_or_follows_url = 'http://weibo.com/p/100606{}/follow?page={}#Pl_Official_HisRelation__47' cur_page = 1 max_page = 6 user_ids = list() while cur_page < max_page: url = fans_or_follows_url.format(user_id, cur_page) page = get_page(url) if cur_page == 1: urls_length = public.get_max_crawl_pages(page) if max_page > urls_length: max_page = urls_length + 1 # get ids and store relations user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type)) cur_page += 1 return user_ids
def get_fans_or_followers_ids(user_id, crawl_type): """ Get followers or fans :param user_id: user id :param crawl_type: 1 stands for fans,2 stands for follows :return: lists of fans or followers """ # todo check fans and followers the special users,such as writers # todo deal with conditions that fans and followers more than 5 pages if crawl_type == 1: fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60' else: fans_or_follows_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60' cur_page = 1 max_page = 6 user_ids = list() while cur_page < max_page: url = fans_or_follows_url.format(user_id, cur_page) page = get_page(url) if cur_page == 1: urls_length = public.get_max_crawl_pages(page) if max_page > urls_length: max_page = urls_length + 1 # get ids and store relations user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type)) cur_page += 1 return user_ids
def get_fans_or_followers_ids(user_id, crawl_type): """ 获取用户的粉丝和关注用户 :param user_id: 用户id :param crawl_type: 1表示获取粉丝,2表示获取关注 :return: 获取的关注或者粉丝列表 """ # todo 验证作家等用户的粉丝和关注是否满足;处理粉丝或者关注5页的情况 if crawl_type == 1: ff_url = 'http://weibo.com/p/100505{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60' else: ff_url = 'http://weibo.com/p/100505{}/follow?page={}#Pl_Official_HisRelation__60' cur_page = 1 max_page = 6 user_ids = list() while cur_page < max_page: url = ff_url.format(user_id, cur_page) page = get_page(url) if cur_page == 1: user_ids.extend(public.get_fans_or_follows(page)) urls_length = public.get_max_crawl_pages(page) if max_page > urls_length: max_page = urls_length + 1 cur_page += 1 return user_ids
def get_fans_or_followers_ids(user_id, domain, crawl_type): """ Get followers or fans :param user_id: user id :param crawl_type: 1 stands for fans,2 stands for follows :return: lists of fans or followers """ # todo check fans and followers the special users,such as writers # todo deal with conditions that fans and followers more than 5 pages if crawl_type == 1: fans_or_follows_url = 'https://weibo.com/p/{}{}/follow?relate=fans&page={}#Pl_Official_HisRelation__60' else: fans_or_follows_url = 'https://weibo.com/p/{}{}/follow?page={}#Pl_Official_HisRelation__60' cur_page = 1 max_page = 6 user_ids = list() while cur_page < max_page: url = fans_or_follows_url.format(domain, user_id, cur_page) page = get_page(url) if cur_page == 1: urls_length = public.get_max_crawl_pages(page) if max_page > urls_length: max_page = urls_length + 1 # get ids and store relations user_ids.extend(public.get_fans_or_follows(page, user_id, crawl_type)) cur_page += 1 return user_ids
def test_get_fans(self): """ test parsing fans pages """ from page_parse.user import public url = TEST_SERVER + 'fans.html' resp = requests.get(url) resp.encoding = 'utf-8' cont = resp.text ids = public.get_fans_or_follows(cont, '2036911095', 1) self.assertEqual(len(ids), 9)
def test_get_fans(self): """ test parsing fans pages """ from page_parse.user import public url = TEST_SERVER + 'fans.html' resp = requests.get(url) resp.encoding = 'utf-8' cont = resp.text ids = public.get_fans_or_follows(cont, '2036911095', 1) self.assertEqual(len(ids), 9)