Exemplo n.º 1
0
    def test_parse_user_info(self):
        """
        test parsing pages
        """
        from page_parse.user import person, public
        from page_get.user import get_user_detail

        url = TEST_SERVER + 'writer.html'
        resp = requests.get(url)
        resp.encoding = 'utf-8'
        cont = resp.text
        user = person.get_detail(cont)
        user.verify_type = public.get_verifytype(cont)
        self.assertEqual(user.verify_type, 1)
        self.assertEqual(user.description, '韩寒')

        url = TEST_SERVER + 'person.html'
        resp = requests.get(url)
        resp.encoding = 'utf-8'
        cont = resp.text
        user = get_user_detail('222333312', cont)
        self.assertEqual(user.follows_num, 539)

        url = TEST_SERVER + 'excp.html'
        resp = requests.get(url)
        resp.encoding = 'utf-8'
        cont = resp.text
        user = get_user_detail('1854706423', cont)
        self.assertEqual(user.birthday, '1988年2月21日')
Exemplo n.º 2
0
    def test_parse_user_info(self):
        """
        test parsing pages
        """
        from page_parse.user import person, public
        from page_get.user import get_user_detail

        url = TEST_SERVER + 'writer.html'
        resp = requests.get(url)
        resp.encoding = 'utf-8'
        cont = resp.text
        user = person.get_detail(cont)
        user.verify_type = public.get_verifytype(cont)
        self.assertEqual(user.verify_type, 1)
        self.assertEqual(user.description, '韩寒')

        url = TEST_SERVER + 'person.html'
        resp = requests.get(url)
        resp.encoding = 'utf-8'
        cont = resp.text
        user = get_user_detail('222333312', cont)
        self.assertEqual(user.follows_num, 539)

        url = TEST_SERVER + 'excp.html'
        resp = requests.get(url)
        resp.encoding = 'utf-8'
        cont = resp.text
        user = get_user_detail('1854706423', cont)
        self.assertEqual(user.birthday, '1988年2月21日')
Exemplo n.º 3
0
def get_url_from_web(user_id):
    """
    Get user info according to user id.
    If user domain is 100505,the url is just 100505+userid;
    If user domain is 103505 or 100306, we need to request once more to get his info
    If user type is enterprise or service, we just crawl their home page info
    :param: user id
    :return: user entity
    """
    if not user_id:
        return None

    url = BASE_URL.format('100505', user_id)
    html = get_page(url, auth_level=1)

    if not is_404(html):
        domain = public.get_userdomain(html)

        # writers(special users)
        if domain == '103505' or domain == '100306':
            url = BASE_URL.format(domain, user_id)
            html = get_page(url)
            user = get_user_detail(user_id, html)
        # normal users
        elif domain == '100505':
            user = get_user_detail(user_id, html)
            samefollow_uid = get_samefollow_uid()
            if samefollow_uid.strip() != '':
                samefollow_uid = samefollow_uid.split(',')
                url = SAMEFOLLOW_URL.format(user_id)
                isFanHtml = get_page(url, auth_level=2)
                person.get_isFan(isFanHtml, samefollow_uid, user_id)
        # enterprise or service
        else:
            user = get_enterprise_detail(user_id, html)

        if user is None:
            return None

        user.name = public.get_username(html)
        user.head_img = public.get_headimg(html)
        user.verify_type = public.get_verifytype(html)
        user.verify_info = public.get_verifyreason(html, user.verify_type)
        user.level = public.get_level(html)

        if user.name:
            UserOper.add_one(user)
            storage.info(
                'Has stored user {id} info successfully'.format(id=user_id))
            return user
        else:
            return None

    else:
        return None
Exemplo n.º 4
0
def get_url_from_web(user_id):
    """
    Get user info according to user id.
    If user domain is 100505,the url is just 100505+userid;
    If user domain is 103505 or 100306, we need to request once more to get his info
    If user type is enterprise or service, we just crawl their home page info
    :param: user id
    :return: user entity
    """
    if not user_id:
        return None

    url = BASE_URL.format('100505', user_id)
    html = get_page(url, auth_level=1)

    if not is_404(html):
        domain = public.get_userdomain(html)

        # writers(special users)
        if domain == '103505' or domain == '100306':
            url = BASE_URL.format(domain, user_id)
            html = get_page(url)
            user = get_user_detail(user_id, html)
        # normal users
        elif domain == '100505':
            user = get_user_detail(user_id, html)
            samefollow_uid = get_samefollow_uid()
            if samefollow_uid.strip() != '':
                samefollow_uid = samefollow_uid.split(',')
                url = SAMEFOLLOW_URL.format(user_id)
                isFanHtml = get_page(url, auth_level=2)
                person.get_isFan(isFanHtml, samefollow_uid, user_id)
        # enterprise or service
        else:
            user = get_enterprise_detail(user_id, html)

        if user is None:
            return None

        user.name = public.get_username(html)
        user.head_img = public.get_headimg(html)
        user.verify_type = public.get_verifytype(html)
        user.verify_info = public.get_verifyreason(html, user.verify_type)
        user.level = public.get_level(html)

        if user.name:
            UserOper.add_one(user)
            storage.info('Has stored user {id} info successfully'.format(id=user_id))
            return user
        else:
            return None

    else:
        return None
Exemplo n.º 5
0
 def test_parse_user_info(self):
     from page_parse.user import person, public
     from page_get.user import get_user_detail
     with open('./tests/writer.html') as f:
         cont = f.read()
     user = person.get_detail(cont)
     user.verify_type = public.get_verifytype(cont)
     self.assertEqual(user.verify_type, 1)
     self.assertEqual(user.description, '韩寒')
     with open('./tests/person.html') as f:
         cont = f.read()
     user = get_user_detail('222333312', cont)
     self.assertEqual(user.follows_num, 539)
Exemplo n.º 6
0
def get_url_from_web(user_id):
    """
    Get user info according to user id.
    If user domain is 100505,the url is just 100505+userid;
    If user domain is 103505 or 100306, we need to request once more to get his info
    If user type is enterprise or service, we just crawl their home page info
    :param: user id
    :return: user entity
    """
    if not user_id:
        return None

    url = base_url.format('100505', user_id)
    html = get_page(url)

    if not is_404(html):
        domain = public.get_userdomain(html)

        # writers(special users)
        if domain == '103505' or domain == '100306':
            url = base_url.format(domain, user_id)
            html = get_page(url)
            user = get_user_detail(user_id, html)
        # normal users
        elif domain == '100505':
            user = get_user_detail(user_id, html)
        # enterprise or service
        else:
            user = get_enterprise_detail(user_id, html)

        if user is None:
            return None

        user.name = public.get_username(html)
        user.head_img = public.get_headimg(html)
        user.verify_type = public.get_verifytype(html)
        user.verify_info = public.get_verifyreason(html, user.verify_type)
        user.level = public.get_level(html)

        if user.name:
            save_user(user)
            storage.info('has stored user {id} info successfully'.format(id=user_id))
            return user
        else:
            return None

    else:
        return None
Exemplo n.º 7
0
def get_url_from_web(user_id, domain):
    """
    Get user info according to user id.
    If user domain is 100505,the url is just 100505+userid;
    If user domain is 103505 or 100306, we need to request once more to get his info
    If user type is enterprise or service, we just crawl their home page info
    :param: user id
    :return: user entity
    """
    if not user_id:
        return None

    url = base_url.format(domain, user_id)
    html = get_page(url)

    if not is_404(html):
        # writers(special users)
        if domain == '103505' or domain == '100306':
            # url = base_url.format(domain, user_id)
            # html = get_page(url)
            user = get_user_detail(user_id, html)
        # normal users
        elif domain == '100505':
            user = get_user_detail(user_id, html)
        # enterprise or service
        else:
            user = get_enterprise_detail(user_id, html)

        if user is None:
            return None

        user.name = public.get_username(html)
        user.head_img = public.get_headimg(html)
        user.verify_type = public.get_verifytype(html)
        user.verify_info = public.get_verifyreason(html, user.verify_type)
        user.level = public.get_level(html)

        if user.name:
            save_user(user)
            storage.info(
                'has stored user {id} info successfully'.format(id=user_id))
            return user
        else:
            return None

    else:
        return None
Exemplo n.º 8
0
def get_url_from_web(user_id):
    """
    根据用户id获取用户资料:如果用户的domain为100505,那么会直接返回用户详细资料;如果是103505或者100306,那么需要再进行
    一次请求,因为用base_url的方式它只会定位到用户主页而不是详细资料页;如果是企业和服务号等,通过base_url访问也会跳转到该
    用户的主页,由于该类用户的详细页价值不大,所以不再进行请求它们的详细页
    :param user_id: 用户id
    :return: 用户类实体
    """
    if not user_id:
        return None

    url = base_url.format('100505', user_id)
    html = get_page(url)

    if not is_404(html):
        domain = public.get_userdomain(html)

        # 作家
        if domain == '103505' or domain == '100306':
            url = base_url.format(domain, user_id)
            html = get_page(url)
            user = get_user_detail(user_id, html)
        # 普通用户
        elif domain == '100505':
            user = get_user_detail(user_id, html)
        # 默认是企业
        else:
            user = get_enterprise_detail(user_id, html)

        if user is None:
            return None

        user.name = public.get_username(html)
        user.head_img = public.get_headimg(html)
        user.verify_type = public.get_verifytype(html)
        user.verify_info = public.get_verifyreason(html, user.verify_type)
        user.level = public.get_level(html)

        # 保存用户信息到数据库
        save_user(user)
        storage.info('已经成功保存ID为{id}的用户信息'.format(id=user_id))

        return user
    else:
        return None
Exemplo n.º 9
0
 def test_parse_user_info(self):
     """
     测试解析页面功能
     """
     from page_parse.user import person, public
     from page_get.user import get_user_detail
     with open('./tests/writer.html', encoding='utf-8') as f:
         cont = f.read()
     user = person.get_detail(cont)
     user.verify_type = public.get_verifytype(cont)
     self.assertEqual(user.verify_type, 1)
     self.assertEqual(user.description, '韩寒')
     with open('./tests/person.html', encoding='utf-8') as f:
         cont = f.read()
     user = get_user_detail('222333312', cont)
     self.assertEqual(user.follows_num, 539)
     with open('./tests/excp.html', encoding='utf-8') as f:
         cont = f.read()
     user = get_user_detail('1854706423', cont)
     self.assertEqual(user.birthday, '1988年2月21日')