示例#1
0
def get_fans_or_follows(html, uid, type):
    """
    Get fans or follows and store their relationships
    :param html: current page source
    :param uid: current user id
    :param type: type of relations, 1 stands for fans,2 stands for follows
    :return: list of fans or followers
    """
    if html == '':
        return list()

    pattern = re.compile(r'FM.view\((.*)\)')
    soup = BeautifulSoup(html, "html.parser")
    scripts = soup.find_all('script')

    user_ids = list()
    relations = list()
    for script in scripts:
        m = re.search(pattern, script.string)

        if m and 'pl.content.followTab.index' in script.string:
            all_info = m.group(1)
            cont = json.loads(all_info).get('html', '')
            soup = BeautifulSoup(cont, 'html.parser')
            follows = soup.find(attrs={
                'class': 'follow_box'
            }).find_all(attrs={'class': 'follow_item'})
            pattern = 'uid=(.*?)&'
            for follow in follows:
                m = re.search(pattern, str(follow))
                if m:
                    r = m.group(1)
                    # filter invalid ids
                    if r.isdigit():
                        user_ids.append(r)
                        relations.append(UserRelation(uid, r, type))

    save_relations(relations)
    return user_ids
示例#2
0
def get_fans_or_follows(html, uid, type):
    """
    Get fans or follows and store their relationships
    :param html: current page source
    :param uid: current user id
    :param type: type of relations, 1 stands for fans,2 stands for follows
    :return: list of fans or followers
    """
    if html == '':
        return list()

    pattern = re.compile(r'FM.view\((.*)\)')
    soup = BeautifulSoup(html, "html.parser")
    scripts = soup.find_all('script')

    user_ids = list()
    relations = list()
    for script in scripts:
        m = re.search(pattern, script.string)

        if m and 'pl.content.followTab.index' in script.string:
            all_info = m.group(1)
            cont = json.loads(all_info).get('html', '')
            soup = BeautifulSoup(cont, 'html.parser')
            follows = soup.find(attrs={'class': 'follow_box'}).find_all(attrs={'class': 'follow_item'})
            pattern = 'uid=(.*?)&'
            for follow in follows:
                m = re.search(pattern, str(follow))
                if m:
                    r = m.group(1)
                    # filter invalid ids
                    if r.isdigit():
                        user_ids.append(r)
                        relations.append(UserRelation(uid, r, type))

    save_relations(relations)
    return user_ids