def get_fans_or_follows(html, uid, type): """ Get fans or follows and store their relationships :param html: current page source :param uid: current user id :param type: type of relations, 1 stands for fans,2 stands for follows :return: list of fans or followers """ if html == '': return list() pattern = re.compile(r'FM.view\((.*)\)') soup = BeautifulSoup(html, "html.parser") scripts = soup.find_all('script') user_ids = list() relations = list() for script in scripts: m = re.search(pattern, script.string) if m and 'pl.content.followTab.index' in script.string: all_info = m.group(1) cont = json.loads(all_info).get('html', '') soup = BeautifulSoup(cont, 'html.parser') follows = soup.find(attrs={ 'class': 'follow_box' }).find_all(attrs={'class': 'follow_item'}) pattern = 'uid=(.*?)&' for follow in follows: m = re.search(pattern, str(follow)) if m: r = m.group(1) # filter invalid ids if r.isdigit(): user_ids.append(r) relations.append(UserRelation(uid, r, type)) save_relations(relations) return user_ids
def get_fans_or_follows(html, uid, type): """ Get fans or follows and store their relationships :param html: current page source :param uid: current user id :param type: type of relations, 1 stands for fans,2 stands for follows :return: list of fans or followers """ if html == '': return list() pattern = re.compile(r'FM.view\((.*)\)') soup = BeautifulSoup(html, "html.parser") scripts = soup.find_all('script') user_ids = list() relations = list() for script in scripts: m = re.search(pattern, script.string) if m and 'pl.content.followTab.index' in script.string: all_info = m.group(1) cont = json.loads(all_info).get('html', '') soup = BeautifulSoup(cont, 'html.parser') follows = soup.find(attrs={'class': 'follow_box'}).find_all(attrs={'class': 'follow_item'}) pattern = 'uid=(.*?)&' for follow in follows: m = re.search(pattern, str(follow)) if m: r = m.group(1) # filter invalid ids if r.isdigit(): user_ids.append(r) relations.append(UserRelation(uid, r, type)) save_relations(relations) return user_ids