示例#1
0
    def __parse_account_info(self, data, req_url):

        __biz = tools.get_param(req_url, "__biz")

        regex = 'id="nickname">(.*?)</strong>'
        account = tools.get_info(data, regex, fetch_one=True).strip()

        regex = 'profile_avatar">.*?<img src="(.*?)"'
        head_url = tools.get_info(data, regex, fetch_one=True)

        regex = 'class="profile_desc">(.*?)</p>'
        summary = tools.get_info(data, regex, fetch_one=True).strip()

        # 认证信息(关注的账号直接点击查看历史消息,无认证信息)
        regex = '<i class="icon_verify success">.*?</i>(.*?)</span>'
        verify = tools.get_info(data, regex, fetch_one=True)
        verify = verify.strip() if verify else ""

        # 二维码
        regex = 'var username = "" \|\| "(.*?)";'  # ||  需要转译
        qr_code = tools.get_info(data, regex, fetch_one=True)
        qr_code = "http://open.weixin.qq.com/qr/code?username="******"__biz": __biz,
            "account": account,
            "head_url": head_url,
            "summary": summary,
            "qr_code": qr_code,
            "verify": verify,
            "spider_time": tools.get_current_date(),
        }

        if account_data:
            data_pipeline.save_account(account_data)
示例#2
0
    def __parse_account_info(self, data, req_url):
        '''
        @summary:
        ---------
        @param data:
        ---------
        @result:
        '''
        __biz = tools.get_param(req_url, '__biz')

        regex = 'id="nickname">(.*?)</strong>'
        account = tools.get_info(data, regex, fetch_one=True).strip()

        regex = 'profile_avatar">.*?<img src="(.*?)"'
        head_url = tools.get_info(data, regex, fetch_one=True)

        regex = 'class="profile_desc">(.*?)</p>'
        summary = tools.get_info(data, regex, fetch_one=True).strip()

        # 认证信息(关注的账号直接点击查看历史消息,无认证信息)
        regex = '<i class="icon_verify success">.*?</i>(.*?)</span>'
        verify = tools.get_info(data, regex, fetch_one=True)
        verify = verify.strip() if verify else ''

        # 二维码
        regex = 'var username = "" \|\| "(.*?)";'  # ||  需要转译
        qr_code = tools.get_info(data, regex, fetch_one=True)
        qr_code = 'http://open.weixin.qq.com/qr/code?username='******'__biz': __biz,
            'account': account,
            'head_url': head_url,
            'summary': summary,
            'qr_code': qr_code,
            'verify': verify,
            'spider_time': tools.get_current_date()
        }

        if account_data:
            data_pipeline.save_account(account_data)