Пример #1
0
    def _prepare(self, data=None):
        super()._prepare(data)

        _driver = None
        if 'driver_data' in self.state:
            _driver = dill.loads(self.state['driver_data'])
            del self.state['driver_data']
        self.dsc = DriverRequestsCoordinator(d=_driver, create_session=self._create_session, create_driver=self._create_driver)
Пример #2
0
    def _prepare(self, data=None):
        """恢复状态,初始化结果"""
        super()._prepare(data)
        self.result_data['baseInfo'] = {}
        # state
        # state: dict = self.state
        # TODO: restore from state

        # result
        # result: dict = self.result
        # TODO: restore from result
        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)
Пример #3
0
 def _prepare(self, data=None):
     super()._prepare(data)
     self.dsc = DriverRequestsCoordinator(s=self.s, create_driver=self._create_driver)
Пример #4
0
class Task(AbsFetchTask):
    task_info = {
        'task_name': '测试selenium登录[fast]',
        'help': '测试selenium登录[fast]'
    }

    def _get_common_headers(self):
        return {'User-Agent': USER_AGENT}

    def _prepare(self, data=None):
        super()._prepare(data)

        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/PicCheckCode1/g')

        driver.get('http://www.bjgjj.gov.cn/')

        return driver

    def _setup_task_units(self):
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _query(self, params: dict):
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()

    def _new_vc(self):
        vc_url = VC_IMAGE_URL + str(int(time.time() * 1000))
        resp = self.s.get(vc_url)
        return dict(cls='data:image',
                    content=resp.content,
                    content_type=resp.headers.get('Content-Type'))

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '身份证号' not in params:
                params['身份证号'] = meta.get('身份证号')
            if '查询密码' not in params:
                params['查询密码'] = meta.get('查询密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '身份证号' and '身份证号' in meta:
                continue
            elif pr['key'] == '查询密码' and '查询密码' in meta:
                continue
            res.append(pr)
        return res

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '身份证号' in params, '缺少身份证号'
        assert '查询密码' in params, '缺少查询密码'
        assert 'vc' in params, '缺少验证码'

        # TODO: 检查身份证号
        # TODO: 检查密码
        # TODO: 检查验证码

    def _unit_login(self, params=None):
        err_msg = None
        if params:
            try:
                self._check_login_params(params)
                username = params['身份证号']
                password = params['查询密码']
                vc = params['vc']

                self._do_login(username, password, vc)
                # 登录成功
                self.result_key = username
                self.result_meta.update({'身份证号': username, '查询密码': password})
                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)

        raise AskForParamsError([
            dict(key='身份证号',
                 name='身份证号',
                 cls='input',
                 value=params.get('身份证号', '')),
            dict(key='查询密码',
                 name='个人编号',
                 cls='input',
                 value=params.get('查询密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(LOGIN_PAGE_URL)
            # 等待lk请求
            WebDriverWait(driver, 10).until(
                value_is_number((By.XPATH, '//*[@id="lk"]')))

            for l in driver.get_log('browser'):
                print(l)

            # 选择身份证号方式登录
            driver.find_element_by_xpath(
                '/html/body/table[2]/tbody/tr[3]/td/table/tbody/tr/td/div/form/div[1]/ul/li[3]/a'
            ).click()

            username_input = driver.find_element_by_xpath('//*[@id="bh1"]')
            password_input = driver.find_element_by_xpath('//*[@id="mm1"]')
            vc_input = driver.find_element_by_xpath(
                '//*[@id="login_tab_2"]/div/div[3]/input')
            submit_btn = driver.find_element_by_xpath(
                '//*[@id="login_tab_2"]/div/div[4]/input[1]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)
            vc_input.clear()
            vc_input.send_keys(vc)
            # 提交
            submit_btn.click()

            if not driver.current_url == 'http://www.bjgjj.gov.cn/wsyw/wscx/gjjcx-choice.jsp':
                # FIXME: debug
                for l in driver.get_log('browser'):
                    print(l)
                print(driver.get_cookies())

                # FIXME: 尝试处理alert
                err_msg = '登录失败,请检查输入'
                alert = driver.switch_to.alert
                try:
                    err_msg = alert.text
                    # alert.accept()
                finally:
                    raise InvalidParamsError(err_msg)

            # 登录成功

            # 保存登录后的页面内容供抓取单元解析使用
            self.g.login_page_html = driver.find_element_by_tag_name(
                'html').get_attribute('innerHTML')
            self.g.current_url = driver.current_url

    def _unit_fetch(self):
        try:
            # TODO:
            soup = bs4.BeautifulSoup(self.g.login_page_html, 'html.parser')
            a = soup.select('a')[1]
            link = a.attrs['onclick'].split('"')[1]
            link = parse.urljoin(self.g.current_url, link)

            resp = self.s.get(link)
            self.result_data.update({
                'xxx': a.text,
                'link': link,
                'content': html.unescape(resp.text)
            })
            self.result_identity.update(
                {'task_name': self.task_info['task_name']})
        except InvalidConditionError as e:
            raise PreconditionNotSatisfiedError(e)
Пример #5
0
 def _prepare(self, data=None):
     super()._prepare(data)
     self.proxy = get_proxy_ip()
     self.result_data['baseInfo'] = {}
     self.dsc = DriverRequestsCoordinator(s=self.s,
                                          create_driver=self._create_driver)
Пример #6
0
class Task(AbsFetchTask):
    task_info = dict(city_name="上海",
                     help="""<li>用户名:为参保人身份证号</li>
        <li>密码:一般为6位数字;</li>
        <li>首次申请密码或遗忘网上登录密码,本人需携带有效身份证件至就近接到社区事务受理中心或就近社保分中心自助机申请办理。</li>
        """,
                     developers=[{
                         'name': '程菲菲',
                         'email': '*****@*****.**'
                     }])

    def _get_common_headers(self):
        return {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
            'Accept-Encoding': 'gzip, deflate',
            'Host': 'www.12333sh.gov.cn',
        }

    def _prepare(self, data=None):
        super()._prepare(data)
        self.proxy = get_proxy_ip()
        self.result_data['baseInfo'] = {}
        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/sbsjb\wzb\/Bmblist12.jpg/g')
        # driver.service.service_args.append('--proxy='+get_proxy_ip()+'')
        # driver.service.service_args.append('--proxy-type=socks5')

        proxy = webdriver.Proxy()
        proxy.proxy_type = ProxyType.MANUAL
        proxy.http_proxy = get_proxy_ip()
        proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
        driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)

        # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
        driver.set_page_load_timeout(30)
        # 设置10秒脚本超时时间
        driver.set_script_timeout(30)

        # 随便访问一个相同host的地址,方便之后设置cookie
        driver.get('"http://www.12333sh.gov.cn/xxxx')
        return driver

    def _query(self, params: dict):
        """任务状态查询"""
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()
            # pass

    def _new_vc(self):
        ress = self.s.get("http://www.12333sh.gov.cn/sbsjb/wzb/229.jsp",
                          timeout=10,
                          proxies={"http": "http://" + self.proxy})
        resp = self.s.get(VC_URL,
                          timeout=10,
                          proxies={"http": "http://" + self.proxy})
        return dict(content=resp.content,
                    content_type=resp.headers['Content-Type'])

    def _setup_task_units(self):
        """设置任务执行单元"""
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '用户名' in params, '缺少用户名'
        assert '密码' in params, '缺少密码'
        # other check
        用户名 = params['用户名']
        密码 = params['密码']

        if len(用户名) == 0:
            raise InvalidParamsError('用户名为空,请输入用户名')
        elif len(用户名) < 15:
            raise InvalidParamsError('用户名不正确,请重新输入')

        if len(密码) == 0:
            raise InvalidParamsError('密码为空,请输入密码!')
        elif len(密码) < 6:
            raise InvalidParamsError('密码不正确,请重新输入!')

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '用户名' not in params:
                params['用户名'] = meta.get('用户名')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '用户名' and '用户名' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            res.append(pr)
        return res

    def _unit_login(self, params: dict):
        err_msg = None
        if params:
            try:
                self._check_login_params(params)

                id_num = params.get("用户名")
                account_pass = params.get("密码")
                vc = params.get("vc")

                self._do_login(id_num, account_pass, vc)

                # data = {
                #     'userid': id_num,
                #     'userpw': account_pass,
                #     'userjym': vc.encode('gbk'),
                # }
                # resp = self.s.post("http://www.12333sh.gov.cn/sbsjb/wzb/dologin.jsp", data=data)
                # # 检查是否登录成功
                # if resp.status_code != 200:
                #     raise InvalidParamsError("登录失败")
                #
                # if resp.url != LOGIN_SUCCESS_URL:
                #     soup = BeautifulSoup(resp.content, 'html.parser')
                #     spans = soup.select('tr > td > span')
                #     err_msg = "登录失败"
                #     if spans and len(spans) > 0:
                #         err_msg = spans[0].text
                #     raise InvalidParamsError(err_msg)

                # 设置key
                self.result_key = params.get('用户名')
                # 保存到meta
                self.result_meta['用户名'] = params.get('用户名')
                self.result_meta['密码'] = params.get('密码')
                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)

        raise AskForParamsError([
            dict(key='用户名',
                 name='用户名',
                 cls='input',
                 placeholder='请输入身份证号',
                 value=params.get('用户名', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(LOGIN_URL)
            time.sleep(10)
            driver.get("http://www.12333sh.gov.cn/sbsjb/wzb/229.jsp")

            username_input = driver.find_element_by_xpath('//*[@id="userid"]')
            password_input = driver.find_element_by_xpath('//*[@id="userpw"]')
            vc_input = driver.find_element_by_xpath('//*[@id="userjym"]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)

            # 验证码
            vc_input.clear()
            vc_input.send_keys(vc)

            # 登录
            # driver.find_element_by_xpath('//*[@id="ckRecId20"]/form/table[1]/tbody/tr[7]/td[2]/img').click()  # /html/body/form/table/tbody/tr[6]/td[2]
            driver.execute_script('checkForm()')
            time.sleep(10)

            if driver.current_url != "http://www.12333sh.gov.cn/sbsjb/wzb/helpinfo.jsp?id=0":
                raise InvalidParamsError('登录失败,请重新登录!')

    def _unit_fetch(self):
        try:
            # TODO: 执行任务,如果没有登录,则raise PermissionError

            resp = self.s.get(
                "http://www.12333sh.gov.cn/sbsjb/wzb/sbsjbcx12.jsp",
                proxies={"http": "http://" + self.proxy},
                timeout=30)
            soup = BeautifulSoup(resp.content, 'html.parser')
            # years = soup.find('xml', {'id': 'dataisxxb_sum3'}).findAll("jsjs")
            details = soup.find('xml', {
                'id': 'dataisxxb_sum2'
            }).findAll("jsjs")

            if (soup.find('xml', {
                    'id': 'dataisxxb_sum4'
            }).find('jsjs2') != None):
                moneyTime = soup.find('xml', {
                    'id': 'dataisxxb_sum4'
                }).find('jsjs2').text
            else:
                moneyTime = len(details)

            # 社保缴费明细
            # 养老
            self.result_data['old_age'] = {"data": {}}
            dataBaseE = self.result_data['old_age']["data"]
            modelE = {}
            personmoney = 0.00

            dt = soup.findAll("jfdwinfo")

            for a in range(len(details)):
                yearE = details[a].find('jsjs1').text[0:4]
                monthE = details[a].find('jsjs1').text[4:6]

                dataBaseE.setdefault(yearE, {})
                dataBaseE[yearE].setdefault(monthE, [])

                modelE = {
                    '缴费时间': details[a].find('jsjs1').text,
                    '缴费单位': self._match_commapy(details[a].find('jsjs1').text,
                                                dt),
                    '缴费基数': details[a].find('jsjs3').text,
                    '缴费类型': '',
                    '公司缴费': '',
                    '个人缴费': details[a].find('jsjs4').text,

                    # '实缴金额': self._match_money(details[a].find('jsjs1').text, years[a].find('jsjs1').text,years[a].find('jsjs3').text)
                }
                personmoney += float(details[a].find('jsjs4').text)
                dataBaseE[yearE][monthE].append(modelE)

            # 医疗
            self.result_data['medical_care'] = {"data": {}}
            dataBaseH = self.result_data['medical_care']["data"]
            modelH = {}

            for b in range(len(details)):
                yearH = details[b].find('jsjs1').text[0:4]
                monthH = details[b].find('jsjs1').text[4:6]

                dataBaseH.setdefault(yearH, {})
                dataBaseH[yearH].setdefault(monthH, [])

                modelH = {
                    '缴费时间': details[b].find('jsjs1').text,
                    '缴费单位': self._match_commapy(details[b].find('jsjs1').text,
                                                dt),
                    '缴费基数': details[b].find('jsjs3').text,
                    '缴费类型': '',
                    '公司缴费': '',
                    '个人缴费': details[b].find('jsjs6').text,
                }
                dataBaseH[yearH][monthH].append(modelH)

            # 失业
            self.result_data['unemployment'] = {"data": {}}
            dataBaseI = self.result_data['unemployment']["data"]
            modelI = {}

            for c in range(len(details)):
                yearI = details[c].find('jsjs1').text[0:4]
                monthI = details[c].find('jsjs1').text[4:6]

                dataBaseI.setdefault(yearI, {})
                dataBaseI[yearI].setdefault(monthI, [])

                modelI = {
                    '缴费时间': details[c].find('jsjs1').text,
                    '缴费单位': self._match_commapy(details[c].find('jsjs1').text,
                                                dt),
                    '缴费基数': details[c].find('jsjs3').text,
                    '缴费类型': '',
                    '公司缴费': '',
                    '个人缴费': details[c].find('jsjs8').text,
                }
                dataBaseI[yearI][monthI].append(modelI)

            # 工伤
            self.result_data['injuries'] = {"data": {}}

            # 生育
            self.result_data['maternity'] = {"data": {}}

            # 大病
            self.result_data["serious_illness"] = {"data": {}}

            self.result_identity.update({
                "task_name": "上海",
                "target_name": soup.find('xm').text,
                "target_id": self.result_meta['用户名'],
                "status": ""
            })

            if (soup.find('xml', {
                    'id': 'dataisxxb_sum4'
            }).find('jsjs3') != None):
                personOldMoney = soup.find('xml', {
                    'id': 'dataisxxb_sum4'
                }).find('jsjs3').text
            else:
                personOldMoney = personmoney

            startTime = ""
            recentTime = ""
            if (len(details) != 0):
                startTime = details[0].find('jsjs1').text
                recentTime = details[len(details) - 1].find('jsjs1').text

            self.result['data']['baseInfo'] = {
                '姓名': soup.find('xm').text,
                '身份证号': self.result_meta['用户名'],
                '更新时间': time.strftime("%Y-%m-%d", time.localtime()),
                '城市名称': '上海市',
                '城市编号': '310100',
                '缴费时长': moneyTime,
                '最近缴费时间': recentTime,
                '开始缴费时间': startTime,
                '个人养老累计缴费': personOldMoney,
                '个人医疗累计缴费': '',
                '账户状态': ''
            }

            return
        except InvalidConditionError as e:
            raise PreconditionNotSatisfiedError(e)

    def _match_money(self, dtime1, dtime2, fmoney):
        if (dtime1 == dtime2):
            return fmoney
        else:
            return ""

    def _match_commapy(self, dtime, dt):
        rescom = ""
        if (dt != None):
            for tr in range(len(dt)):
                trd = dt[tr].find('jfsj').text.split('-')
                if (trd[0] <= dtime <= trd[1]):
                    rescom = dt[tr].find('jfdw').text

        return rescom
Пример #7
0
class Task(AbsFetchTask):
    task_info = dict(city_name="广州",
                     help="""
        <li>个人用户第一次忘记密码,需要到各办事窗口办理;在办事窗口补充完整相关信息(如电子邮箱地址)以后,忘记密码功能才能使用。</li>
        <li>由于目前缴费历史的查询量较多,为减轻广州社保系统压力,限制每人每天只能查询5次,敬请谅解!</li>
        """,
                     developers=[{
                         'name': '程菲菲',
                         'email': '*****@*****.**'
                     }])

    def _get_common_headers(self):
        return {
            'User-Agent': USER_AGENT,
            # 'Accept-Encoding':'gzip, deflate, sdch',
            # 'X-Requested-With': 'XMLHttpRequest',
            # 'Host':'gzlss.hrssgz.gov.cn'
        }

    def _prepare(self, data=None):
        """恢复状态,初始化结果"""
        super()._prepare(data)
        self.result_data['baseInfo'] = {}
        # state
        # state: dict = self.state
        # TODO: restore from state

        # result
        # result: dict = self.result
        # TODO: restore from result
        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_chrome_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            driver_type=DriverType.CHROME)
        return driver

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/cas\/captcha.jpg/g')
        # proxy = webdriver.Proxy()
        # proxy.proxy_type = ProxyType.DIRECT
        # proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
        # driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
        # # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
        driver.set_page_load_timeout(20)
        # 设置10秒脚本超时时间
        driver.set_script_timeout(20)
        # 随便访问一个相同host的地址,方便之后设置cookie
        driver.get('http://gzlss.hrssgz.gov.cn/xxxx')
        return driver

    def _query(self, params: dict):
        """任务状态查询"""
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()
            # pass

    def _new_vc(self):
        resp = self.s.get(VC_URL)
        return dict(content=resp.content,
                    content_type=resp.headers['Content-Type'])

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '账号' not in params:
                params['账号'] = meta.get('账号')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '账号' and '账号' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            res.append(pr)
        return res

    def _setup_task_units(self):
        """设置任务执行单元"""
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '账号' in params, '缺少账号'
        assert '密码' in params, '缺少密码'
        # other check
        账号 = params['账号']
        密码 = params['密码']
        if len(密码) < 4:
            raise InvalidParamsError('账号或密码错误')
        if len(账号) < 15:
            raise InvalidParamsError('账号或密码错误')

    def _loadJs(self):
        import execjs
        resps = self.s.get("http://gzlss.hrssgz.gov.cn/cas/login")
        modlus = BeautifulSoup(resps.content).findAll('script')[2].text.split(
            '=')[3].split(';')[0].replace('"', '')
        jsstrs = self.s.get(
            "http://gzlss.hrssgz.gov.cn/cas/third/jquery-1.5.2.min.js")
        jsstr = self.s.get("http://gzlss.hrssgz.gov.cn/cas/third/security.js")
        ctx = execjs.compile(jsstr.text + jsstrs.text)
        key = ctx.call("RSAUtils.getKeyPair", '010001', '', modlus)

        resp = self.s.get("http://gzlss.hrssgz.gov.cn/cas/login")
        lt = BeautifulSoup(resp.content,
                           'html.parser').find('input',
                                               {'name': 'lt'})['value']
        datas = {
            'usertype': "2",
            'lt': lt,
            # 'username': params.get('账号'),
            # 'password': params.get('密码'),
            '_eventId': 'submit'
        }

        resps = self.s.post(
            "http://gzlss.hrssgz.gov.cn/cas/login?service=http://gzlss.hrssgz.gov.cn:80/gzlss_web/business/tomain/main.xhtml",
            datas)
        raise InvalidParamsError(resps.text)

    def _unit_login(self, params=None):
        err_msg = None
        if params:
            try:
                self._check_login_params(params)

                id_num = params['账号']
                pass_word = params['密码']
                vc = params['vc']

                self._do_login(id_num, pass_word, vc)

                #  登录成功
                # 保存到meta
                self.result_key = id_num
                self.result_meta['账号'] = id_num
                self.result_meta['密码'] = pass_word

                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)

        raise AskForParamsError([
            dict(key='账号', name='账号', cls='input', value=params.get('账号', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(LOGIN_URL)

            username_input = driver.find_element_by_xpath(
                '//*[@id="loginName"]')
            password_input = driver.find_element_by_xpath(
                '//*[@id="loginPassword"]')
            vc_input = driver.find_element_by_xpath('//*[@id="validateCode"]')
            user_type = driver.find_element_by_xpath('//*[@id="usertype2"]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)

            # 验证码
            vc_input.clear()
            vc_input.send_keys(vc)

            user_type.click()

            # 登录
            driver.find_element_by_xpath('//*[@id="submitbt"]').click()

            if driver.current_url.startswith(
                    'http://gzlss.hrssgz.gov.cn/cas/login'):
                err_msg = '登录失败,请重新登录!'
                try:
                    err_msg = driver.find_element_by_xpath(
                        '//*[@id="*.errors"]').text
                finally:
                    raise InvalidParamsError(err_msg)

                    # 登录成功

    def _to_replace(self, con):
        res = con.replace('\r', '').replace('\n', '').replace('\t', '')
        return res

    def _unit_fetch(self):
        try:
            # TODO: 执行任务,如果没有登录,则raise PermissionError
            s = json.loads(self.s.get(User_BaseInfo).text)  # 个人信息导航
            s2 = s[0]['url']
            res = self.s.get("http://gzlss.hrssgz.gov.cn/gzlss_web" +
                             s2)  # 个人基础信息

            if (len(
                    BeautifulSoup(res.text, 'html.parser').findAll(
                        'table', {'class': 'comitTable'})) <= 0):
                raise TaskNotAvailableError("网络异常,请重新登录")
                return

            redata = BeautifulSoup(res.text, 'html.parser').findAll(
                'table', {'class': 'comitTable'})[0]  # 姓名等信息
            redata2 = BeautifulSoup(res.text, 'html.parser').findAll(
                'table', {'class': 'comitTable'})[1]  # 民族等信息

            # 社保明细
            userNum = BeautifulSoup(
                self.s.get(Search_URL).text,
                'html.parser').find('select', {
                    'id': 'aac001'
                }).text.replace('\n', '')  # 员工编号
            sixian = BeautifulSoup(
                self.s.get(Sixian_URL + userNum).text,
                'html.parser').find('table').findAll(
                    "tr", {'class': 'table_white_data'})

            # 医疗保险明细
            permedicalTotal = 0.0
            HmoneyCount = 0
            paraURL = "&startStr=199001&endStr=" + time.strftime(
                '%Y%m', time.localtime()) + ""  # 医疗保险地址参数
            yiliao = BeautifulSoup(
                self.s.get(Yiliao_URL + userNum + paraURL).text, 'html.parser')
            a = yiliao.find('table', {
                'id': 'tableDataList'
            }).find('script').text
            if "请明天再查" in a:
                raise TaskNotAvailableError("您今天的缴费历史查询已经达到5次,请明天再查。")
            elif "找不到相关数据" in a:
                raise TaskNotAvailableError("抱歉,找不到相关数据。")
            elif "非法操作" in a:
                raise TaskNotAvailableError("非法操作,无法查询。")

            self.result_data['medical_care'] = {"data": {}}
            dataBaseH = self.result_data['medical_care']["data"]
            modelH = {}

            si_status = ""
            sidata = yiliao.find('table', {'id': 'tableDataList'})
            if 'alert' not in sidata.text:
                if len(sidata.findAll("tr")) > 1:
                    si_status = self._to_replace(
                        sidata.findAll("tr")[1].findAll("td")[10].text)[
                            0:2]  # 缴存状态
                    si_com = self._to_replace(
                        sidata.findAll("tr")[2].findAll("td")[3].text)  # 缴费单位
                    yiliaoData = sidata.findAll("tr", {'temp': '职工社会医疗保险'})

                    for a in range(len(yiliaoData)):
                        td = yiliaoData[a].findAll("td")
                        permedicalTotal += float(
                            re.findall(r"\d+\.?\d*", td[7].text)[0])

                        yearH = self._to_replace(td[1].text)[0:4]
                        monthH = self._to_replace(td[1].text)[4:6]
                        rangNum = int(self._to_replace(td[3].text))
                        HmoneyCount += rangNum
                        for a1 in range(-1, rangNum - 1):
                            nowtime = datetime.date(
                                int(yearH) + (int(monthH) + a1) // 12,
                                (int(monthH) + a1) % 12 + 1,
                                1).strftime('%Y%m')
                            modelH = {
                                '缴费单位':
                                si_com,
                                '缴费类型':
                                si_status,
                                '缴费时间':
                                nowtime,
                                '缴费基数':
                                self._to_replace(td[9].text),
                                '政府资助':
                                re.findall(r"\d+\.?\d*", td[8].text)[0],
                                '公司缴费':
                                float(re.findall(r"\d+\.?\d*",
                                                 td[6].text)[0]) / rangNum,
                                '个人缴费':
                                float(re.findall(r"\d+\.?\d*", td[7].text)[0])
                                / rangNum
                            }
                            dataBaseH.setdefault(nowtime[0:4], {})
                            dataBaseH[nowtime[0:4]].setdefault(
                                nowtime[4:6], [])
                            dataBaseH[nowtime[0:4]][nowtime[4:6]].append(
                                modelH)
                else:
                    raise TaskNotImplementedError("未查询到数据!")
            else:
                errormsg2 = sidata.text.split('(')[1].split(')')[0]
                raise TaskNotImplementedError(errormsg2)

            # 养老保险明细
            self.result_data['old_age'] = {"data": {}}
            dataBaseE = self.result_data['old_age']["data"]
            modelE = {}
            peroldTotal = 0.0
            for b in range(len(sixian) - 3):
                td2 = sixian[b].findAll("td")
                if (td2[5].text.strip() != ''):
                    peroldTotal += float(td2[5].text)

                    yearE = td2[0].text[0:4]
                    monthE = td2[0].text[4:6]
                    rangNumE = int(td2[2].text)
                    for b1 in range(-1, rangNumE - 1):
                        nowtime2 = datetime.date(
                            int(yearE) + (int(monthE) + b1) // 12,
                            (int(monthE) + b1) % 12 + 1, 1).strftime('%Y%m')
                        modelE = {
                            '缴费单位': td2[11].text,
                            '缴费类型': td2[12].text,
                            '缴费时间': nowtime2,
                            '缴费基数': td2[3].text,
                            '公司缴费': float(td2[4].text) / rangNumE,
                            '个人缴费': float(td2[5].text) / rangNumE
                        }
                        dataBaseE.setdefault(nowtime2[0:4], {})
                        dataBaseE[nowtime2[0:4]].setdefault(nowtime2[4:6], [])
                        dataBaseE[nowtime2[0:4]][nowtime2[4:6]].append(modelE)

            # 失业保险明细
            self.result_data['unemployment'] = {"data": {}}
            dataBaseI = self.result_data['unemployment']["data"]
            modelI = {}
            for c in range(len(sixian) - 3):
                td3 = sixian[c].findAll("td")
                if (td3[0].text.strip() != ""):
                    yearI = td3[0].text[0:4]
                    monthI = td3[0].text[4:6]
                    rangNumI = int(td3[2].text)
                    for c1 in range(-1, rangNumI - 1):
                        nowtime3 = datetime.date(
                            int(yearI) + (int(monthI) + c1) // 12,
                            (int(monthI) + c1) % 12 + 1, 1).strftime('%Y%m')
                        modelI = {
                            '缴费单位': td3[11].text,
                            '缴费类型': td3[12].text,
                            '缴费时间': nowtime3,
                            '缴费基数': td3[3].text,
                            '公司缴费': float(td3[6].text) / rangNumI,
                            '个人缴费': float(td3[7].text) / rangNumI
                        }
                        dataBaseI.setdefault(nowtime3[0:4], {})
                        dataBaseI[nowtime3[0:4]].setdefault(nowtime3[4:6], [])
                        dataBaseI[nowtime3[0:4]][nowtime3[4:6]].append(modelI)

            # 工伤保险明细
            self.result_data['injuries'] = {"data": {}}
            dataBaseC = self.result_data['injuries']["data"]
            modelC = {}
            for d in range(len(sixian) - 3):
                td4 = sixian[d].findAll("td")
                if (td4[0].text.strip() != ""):
                    yearC = td4[0].text[0:4]
                    monthC = td4[0].text[4:6]
                    rangNumC = int(td4[2].text)
                    for d1 in range(-1, rangNumC - 1):
                        nowtime4 = datetime.date(
                            int(yearC) + (int(monthC) + d1) // 12,
                            (int(monthC) + d1) % 12 + 1, 1).strftime('%Y%m')
                        modelC = {
                            '缴费单位': td4[11].text,
                            '缴费类型': td4[12].text,
                            '缴费时间': nowtime4,
                            '缴费基数': td4[3].text,
                            '公司缴费': float(td4[8].text) / rangNumC,
                            '个人缴费': ''
                        }
                        dataBaseC.setdefault(nowtime4[0:4], {})
                        dataBaseC[nowtime4[0:4]].setdefault(nowtime4[4:6], [])
                        dataBaseC[nowtime4[0:4]][nowtime4[4:6]].append(modelC)

            # 生育保险明细
            self.result_data['maternity'] = {"data": {}}
            dataBaseB = self.result_data['maternity']["data"]
            modelB = {}
            for f in range(len(sixian) - 3):
                td5 = sixian[f].findAll("td")
                if (td5[0].text.strip() != ""):
                    yearB = td5[0].text[0:4]
                    monthB = td5[0].text[4:6]
                    rangNumB = int(td5[2].text)
                    for f1 in range(-1, rangNumB - 1):
                        nowtime5 = datetime.date(
                            int(yearB) + (int(monthB) + f1) // 12,
                            (int(monthB) + f1) % 12 + 1, 1).strftime('%Y%m')
                        modelB = {
                            '缴费单位': td5[11].text,
                            '缴费类型': td5[12].text,
                            '缴费时间': nowtime5,
                            '缴费基数': td5[3].text,
                            '公司缴费': float(td5[9].text) / rangNumB,
                            '个人缴费': ''
                        }
                        dataBaseB.setdefault(nowtime5[0:4], {})
                        dataBaseB[nowtime5[0:4]].setdefault(nowtime5[4:6], [])
                        dataBaseB[nowtime5[0:4]][nowtime5[4:6]].append(modelB)

            # 大病保险明细
            dabingData = sidata.findAll("tr", {'temp': '重大疾病医疗补助'})
            self.result_data['serious_illness'] = {"data": {}}
            dataBaseQ = self.result_data['serious_illness']["data"]
            modelQ = {}

            if (len(dabingData) > 0):
                for q in range(len(dabingData)):
                    td6 = dabingData[q].findAll("td")
                    if (td6[0].text.strip() != ""):
                        yearQ = self._to_replace(td[1].text)[0:4]
                        monthQ = self._to_replace(td[1].text)[4:6]
                        rangNumQ = int(self._to_replace(td[3].text))

                        for a1 in range(-1, rangNumQ - 1):
                            nowtime6 = datetime.date(
                                int(yearQ) + (int(monthQ) + a1) // 12,
                                (int(monthQ) + a1) % 12 + 1,
                                1).strftime('%Y%m')
                            modelQ = {
                                '缴费单位':
                                si_com,
                                '缴费类型':
                                si_status,
                                '缴费时间':
                                nowtime6,
                                '缴费基数':
                                self._to_replace(td6[9].text),
                                '政府资助':
                                re.findall(r"\d+\.?\d*", td6[8].text)[0],
                                '公司缴费':
                                float(
                                    re.findall(r"\d+\.?\d*", td6[6].text)[0]) /
                                rangNum,
                                '个人缴费':
                                float(
                                    re.findall(r"\d+\.?\d*", td6[7].text)[0]) /
                                rangNum
                            }
                            dataBaseQ.setdefault(nowtime6[0:4], {})
                            dataBaseQ[nowtime6[0:4]].setdefault(
                                nowtime6[4:6], [])
                            dataBaseQ[nowtime6[0:4]][nowtime6[4:6]].append(
                                modelQ)

            sixiantype = ""
            if (len(sixian) >= 4):
                sixiantype = sixian[len(sixian) - 4].findAll("td")[12].text
            social_status = {
                '医疗': si_status,
                '养老': sixiantype,
                '失业': sixiantype,
                '工伤': sixiantype,
                '生育': sixiantype
            }

            # 缴费时长
            EmoneyCount = sixian[len(sixian) - 3].findAll("td")[1].text
            EmoneyCount2 = sixian[len(sixian) - 3].findAll("td")[2].text
            EmoneyCount3 = sixian[len(sixian) - 3].findAll("td")[3].text
            EmoneyCount4 = sixian[len(sixian) - 3].findAll("td")[4].text
            rescount = [EmoneyCount, EmoneyCount2, EmoneyCount3, EmoneyCount4]
            moneyCount = max(rescount)

            # 个人基本信息
            self.result_data['baseInfo'] = {
                '姓名': redata.find('input', {'id': 'aac003ss'})['value'],
                '身份证号': redata.find('input', {'id': 'aac002ss'})['value'],
                '更新时间': time.strftime("%Y-%m-%d", time.localtime()),
                '城市名称': '广州市',
                '城市编号': '440100',
                '缴费时长': moneyCount,
                '最近缴费时间': sixian[len(sixian) - 4].findAll("td")[1].text,
                '开始缴费时间': sixian[0].findAll("td")[0].text,
                '个人养老累计缴费': peroldTotal,
                '个人医疗累计缴费': permedicalTotal,
                '五险状态': social_status,
                '账户状态': social_status['养老'],
                '个人编号': redata.find('input', {'id': 'aac001'})['value'],
                # '性别': redata.find('input', {'id': 'aac004ss'})['value'],
                # '民族': redata2.find('select', {'id': 'aac005'}).find(selected="selected").text.replace('\r', '').replace('\n', '').replace('\t', ''),
                # '户口性质': redata.find('input', {'id': 'aac009ss'})['value'],
                # '出生日期': redata.find('input', {'id': 'aac006ss'})['value'],
                # '单位名称': redata.find('input', {'id': 'aab069ss'})['value'],
                # '地址': redata2.find('input', {'id': 'bab306'})['value'],
                # '电子邮箱': redata2.find('input', {'id': 'bbc019'})['value']
            }

            # identity信息
            self.result_identity.update({
                "task_name":
                "广州",
                "target_name":
                redata.find('input', {'id': 'aac003ss'})['value'],
                "target_id":
                self.result_meta['账号'],
                "status":
                social_status['养老']
            })

            # 暂时不用代码
            # siresp=self.s.get("http://gzlss.hrssgz.gov.cn/gzlss_web"+s[1]['url'])  # 四险导航
            # sdata=BeautifulSoup(siresp.text,'html.parser')   # 四险find信息
            # hs = json.loads(self.s.get(Medical_URL).text)  # 医疗保险信息
            # medDetailURL=hs[0]['url']         # 医疗
            # hresp=self.s.get("http://gzlss.hrssgz.gov.cn/gzlss_web"+medDetailURL)
            # hdata = BeautifulSoup(hresp.text, 'html.parser')  # 医疗find信息

            return
        except PermissionError as e:
            raise PreconditionNotSatisfiedError(e)
Пример #8
0
class Task(AbsFetchTask):
    task_info = {
        'task_name': '测试selenium登录[fast]',
        'help': '测试selenium登录[fast]'
    }

    def _get_common_headers(self):
        return {'User-Agent': USER_AGENT}

    def _prepare(self, data=None):
        super()._prepare(data)

        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)
        # self.dsc = DriverRequestsCoordinator(s=self.s, create_driver=self._create_chrome_driver)

    def _create_chrome_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            driver_type=DriverType.CHROME)
        return driver

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/cas\/captcha.jpg/g')

        # 随便访问一个相同host的地址,方便之后设置cookie
        driver.get('http://gzlss.hrssgz.gov.cn/xxxx')

        return driver

    def _setup_task_units(self):
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _query(self, params: dict):
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()

    def _new_vc(self):
        vc_url = VC_IMAGE_URL + str(random.random())
        resp = self.s.get(vc_url)
        return dict(cls='data:image',
                    content=resp.content,
                    content_type=resp.headers.get('Content-Type'))

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '账号' not in params:
                params['账号'] = meta.get('账号')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '账号' and '账号' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            res.append(pr)
        return res

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '账号' in params, '缺少账号'
        assert '密码' in params, '缺少密码'
        # other check
        账号 = params['账号']
        密码 = params['密码']
        if len(密码) < 4:
            raise InvalidParamsError('账号或密码错误')
        if len(账号) < 4:
            raise InvalidParamsError('账号或密码错误')

    def _unit_login(self, params=None):
        err_msg = None
        if params:
            try:
                self._check_login_params(params)
                username = params['账号']
                password = params['密码']
                vc = params['vc']

                self._do_login(username, password, vc)
                # 登录成功
                self.result_key = username
                self.result_meta.update({'账号': username, '密码': password})
                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)

        raise AskForParamsError([
            dict(key='账号', name='账号', cls='input', value=params.get('账号', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(LOGIN_PAGE_URL)

            # FIXME: debug
            for l in driver.get_log('browser'):
                print(l)

            username_input = driver.find_element_by_xpath(
                '//*[@id="loginName"]')
            password_input = driver.find_element_by_xpath(
                '//*[@id="loginPassword"]')
            vc_input = driver.find_element_by_xpath('//*[@id="validateCode"]')
            user_type_input = driver.find_element_by_xpath(
                '//*[@id="usertype2"]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)

            # 验证码
            vc_input.clear()
            vc_input.send_keys(vc)

            # 选择类型
            user_type_input.click()

            # 登录
            driver.find_element_by_xpath('//*[@id="submitbt"]').click()

            # FIXME: debug
            for l in driver.get_log('browser'):
                print(l)

            if driver.current_url.startswith(
                    'http://gzlss.hrssgz.gov.cn/cas/login'):
                err_msg = '登录失败,请检查输入'
                try:
                    err_msg = driver.find_element_by_xpath(
                        '//*[@id="*.errors"]').text
                finally:
                    raise InvalidParamsError(err_msg)

    def _unit_fetch(self):
        try:
            resp = self.s.get(
                'http://gzlss.hrssgz.gov.cn/gzlss_web/business/tomain/main.xhtml'
            )
            html = etree.HTML(resp.text)
            target = html.xpath('/html/body/div[1]/div[3]/span/font[1]')
            self.result_data.update({'姓名': target[0].text})
            self.result_identity.update(
                {'task_name': self.task_info['task_name']})
        except InvalidConditionError as e:
            raise PreconditionNotSatisfiedError(e)
Пример #9
0
class Task(AbsFetchTask):
    task_info = dict(city_name="成都",
                     help="""
        <li>联名卡有两个密码,一个是银行查询密码,一个是公积金查询服务密码</li>
        <li>如若查询服务密码,可拨打服务热线12329修改</li>
        """,
                     developers=[{
                         'name': '程菲菲',
                         'email': '*****@*****.**'
                     }])

    def _get_common_headers(self):
        return {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
            'Accept-Encoding': 'gzip, deflate, br',
            'Host': 'gr.cdhrss.gov.cn:442',
            'X-Requested-With': 'XMLHttpRequest',
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6'
        }

    def _prepare(self, data=None):
        super()._prepare(data)
        self.result_data['baseInfo'] = {}

        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/cdwsjb\/CaptchaImg.png/g')
        proxy = webdriver.Proxy()
        proxy.proxy_type = ProxyType.DIRECT
        proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
        driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
        # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
        driver.set_page_load_timeout(13)
        # 设置10秒脚本超时时间
        driver.set_script_timeout(13)
        # 随便访问一个相同host的地址,方便之后设置cookie
        driver.get('https://gr.cdhrss.gov.cn:442/xxxx')
        return driver

    def _query(self, params: dict):
        """任务状态查询"""
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()
            # pass

    def _new_vc(self):
        resp = self.s.get(VC_URL)
        return dict(cls='data:image',
                    content=resp.content,
                    content_type=resp.headers['Content-Type'])

    def _setup_task_units(self):
        """设置任务执行单元"""
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '用户名' in params, '缺少用户名'
        assert '密码' in params, '缺少密码'
        # other check
        用户名 = params['用户名']
        密码 = params['密码']

        if len(用户名) == 0:
            raise InvalidParamsError('用户名为空,请输入用户名')
        elif len(用户名) < 4:
            raise InvalidParamsError('用户名不正确,请重新输入')

        if len(密码) == 0:
            raise InvalidParamsError('密码为空,请输入密码!')
        elif len(密码) < 6:
            raise InvalidParamsError('密码不正确,请重新输入!')

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '用户名' not in params:
                params['用户名'] = meta.get('用户名')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '用户名' and '用户名' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            res.append(pr)
        return res

    def _unit_login(self, params=None):
        err_msg = None
        if params:
            try:
                self._check_login_params(params)

                id_num = params.get("用户名")
                account_pass = params.get("密码")
                vc = params.get("vc")

                self._do_login(id_num, account_pass, vc)

                # data = {
                #     'username':id_num,
                #     'password':account_pass,
                #     'checkCode':vc,
                #     'type':'undefined',
                #     'tm':str(time.time()*1000)[0:13],
                # }
                # resp = self.s.post(LOGIN_URL, data=data)
                # res=json.loads(resp.text)
                # if(len(res)>1):
                #     raise InvalidParamsError(res['msg'])
                # else:

                # 保存到meta
                self.result_key = id_num
                self.result_meta['用户名'] = id_num
                self.result_meta['密码'] = account_pass
                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)

        raise AskForParamsError([
            dict(key='用户名',
                 name='用户名',
                 cls='input',
                 placeholder='请输入登录号|社会保障号|社保卡号',
                 value=params.get('用户名', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get("https://gr.cdhrss.gov.cn:442/cdwsjb/login.jsp")

            username_input = driver.find_element_by_xpath(
                '//*[@id="c_username"]')

            js = 'document.getElementById("c_password").style.display="inline-block";'
            driver.execute_script(js)
            password_input = driver.find_element_by_xpath(
                '//*[@id="c_password"]')
            vc_input = driver.find_element_by_xpath('//*[@id="checkCode"]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)

            # 验证码
            vc_input.clear()
            vc_input.send_keys(vc)

            # 登录
            driver.find_element_by_xpath(
                '//*[@id="loginbox"]/div[5]/div[1]/input').click()
            time.sleep(5)

            if driver.current_url.startswith(
                    'https://gr.cdhrss.gov.cn:442/cdwsjb/login.jsp'):
                raise InvalidParamsError('登录失败,请重新登录!')

    def _convert_type(self, num):
        resinfo = ""
        if (num == "1"):
            resinfo = "正常"
        else:
            resinfo = "停缴"
        return resinfo

    def _unit_fetch(self):
        try:
            # TODO: 执行任务,如果没有登录,则raise PermissionError
            # 个人信息
            res = self.s.get(
                "https://gr.cdhrss.gov.cn:442/cdwsjb/personal/personalHomeAction!query.do"
            )
            if (res.status_code != 200):
                raise TaskNotImplementedError("网络错误,请稍后再试!")
            else:
                s = json.loads(res.text)["fieldData"]
                # 社保明细
                startTime = "199001"
                endTime = time.strftime("%Y%m", time.localtime())  # 查询结束时间
                # 社保缴费明细-----养老
                self.result['data']["old_age"] = {"data": {}}
                basedataE = self.result['data']["old_age"]["data"]
                modelE = {}
                peroldTotal = 0.0
                detailEI = self.s.get(
                    Detail_URL + "?dto['aae041']=" + startTime +
                    "&dto['aae042']=" + endTime +
                    "&dto['aae140_md5list']=&dto['aae140']=110")
                if 'lists' in json.loads(detailEI.text):
                    sEI = json.loads(
                        detailEI.text)['lists']['dg_payment']['list']
                    for a in range(len(sEI)):
                        years = str(sEI[a]['aae002'])[0:4]
                        months = str(sEI[a]['aae002'])[4:6]
                        basedataE.setdefault(years, {})
                        basedataE[years].setdefault(months, [])

                        modelE = {
                            '缴费单位': sEI[a]['aab004'],
                            '缴费时间': sEI[a]['aae002'],
                            '缴费类型': '',
                            '缴费基数': sEI[a]['yac004'],
                            '公司缴费': sEI[a]['dwjfje'],
                            '个人缴费': sEI[a]['grjfje']
                            #'缴费合计': sEI[a]['jfjezh']
                        }
                        peroldTotal += float(sEI[a]['grjfje'])
                        basedataE[years][months].append(modelE)
                else:
                    sEI = {}

                self.result['data']["medical_care"] = {"data": {}}
                basedataH = self.result['data']["medical_care"]["data"]
                modelH = {}
                permedicalTotal = 0.0
                # 社保明细-----医疗
                detailHI = self.s.get(
                    Detail_URL + "?dto['aae041']=" + startTime +
                    "&dto['aae042']=" + endTime +
                    "&dto['aae140_md5list']=&dto['aae140']=310")
                if 'lists' in json.loads(detailHI.text):
                    sHI = json.loads(
                        detailHI.text)['lists']['dg_payment']['list']
                    for b in range(len(sHI)):
                        yearH = str(sHI[b]['aae002'])[0:4]
                        monthH = str(sHI[b]['aae002'])[4:6]
                        basedataH.setdefault(yearH, {})
                        basedataH[yearH].setdefault(monthH, [])

                        modelH = {
                            '缴费单位': sHI[b]['aab004'],
                            '缴费时间': sHI[b]['aae002'],
                            '缴费类型': '',
                            '缴费基数': sHI[b]['yac004'],
                            '公司缴费': sHI[b]['dwjfje'],
                            '个人缴费': sHI[b]['hrzhje'],
                            #'缴费合计': sHI[b]['jfjezh']
                        }
                        permedicalTotal += float(sHI[b]['hrzhje'])
                        basedataH[yearH][monthH].append(modelH)
                else:
                    sHI = {}

                self.result['data']["unemployment"] = {"data": {}}
                basedataI = self.result['data']["unemployment"]["data"]
                modelI = {}
                # 社保明细-----失业
                detailII = self.s.get(
                    Detail_URL + "?dto['aae041']=" + startTime +
                    "&dto['aae042']=" + endTime +
                    "&dto['aae140_md5list']=&dto['aae140']=210")
                if 'lists' in json.loads(detailII.text):
                    sII = json.loads(
                        detailII.text)['lists']['dg_payment']['list']
                    for d in range(len(sII)):
                        yearI = str(sII[d]['aae002'])[0:4]
                        monthI = str(sII[d]['aae002'])[4:6]
                        basedataI.setdefault(yearI, {})
                        basedataI[yearI].setdefault(monthI, [])

                        modelI = {
                            '缴费单位': sII[d]['aab004'],
                            '缴费时间': sII[d]['aae002'],
                            '缴费类型': '',
                            '缴费基数': sII[d]['yac004'],
                            '公司缴费': sII[d]['dwjfje'],
                            '个人缴费': sII[d]['grjfje'],
                            #'缴费合计': sII[d]['jfjezh']
                        }
                        basedataI[yearI][monthI].append(modelI)
                else:
                    sII = {}

                self.result['data']["injuries"] = {"data": {}}
                basedataC = self.result['data']["injuries"]["data"]
                modelC = {}
                # 社保明细-----工伤
                detailCI = self.s.get(
                    Detail_URL + "?dto['aae041']=" + startTime +
                    "&dto['aae042']=" + endTime +
                    "&dto['aae140_md5list']=&dto['aae140']=410")
                if 'lists' in json.loads(detailCI.text):
                    sCI = json.loads(
                        detailCI.text)['lists']['dg_payment']['list']
                    for c in range(len(sCI)):
                        yearC = str(sCI[c]['aae002'])[0:4]
                        monthC = str(sCI[c]['aae002'])[4:6]
                        basedataC.setdefault(yearC, {})
                        basedataC[yearC].setdefault(monthC, [])

                        modelC = {
                            '缴费单位': sCI[c]['aab004'],
                            '缴费时间': sCI[c]['aae002'],
                            '缴费类型': '',
                            '缴费基数': sCI[c]['yac004'],
                            '公司缴费': sCI[c]['dwjfje'],
                            '个人缴费': '',
                            #'缴费合计': sCI[c]['jfjezh']
                        }
                        basedataC[yearC][monthC].append(modelC)
                else:
                    sCI = {}

                self.result['data']["maternity"] = {"data": {}}
                basedataB = self.result['data']["maternity"]["data"]
                modelB = {}
                # 社保明细-----生育
                detailBI = self.s.get(
                    Detail_URL + "?dto['aae041']=" + startTime +
                    "&dto['aae042']=" + endTime +
                    "&dto['aae140_md5list']=&dto['aae140']=510")
                if 'lists' in json.loads(detailBI.text):
                    sBI = json.loads(
                        detailBI.text)['lists']['dg_payment']['list']
                    for f in range(len(sBI)):
                        yearB = str(sBI[f]['aae002'])[0:4]
                        monthB = str(sBI[f]['aae002'])[4:6]
                        basedataB.setdefault(yearB, {})
                        basedataB[yearB].setdefault(monthB, [])

                        modelB = {
                            '缴费单位': sBI[f]['aab004'],
                            '缴费时间': sBI[f]['aae002'],
                            '缴费类型': '',
                            '缴费基数': sBI[f]['yac004'],
                            '公司缴费': sBI[f]['dwjfje'],
                            '个人缴费': '',
                            #'缴费合计': sBI[f]['jfjezh']
                        }
                        basedataB[yearB][monthB].append(modelB)
                else:
                    sBI = {}

                # 大病缴费明细
                self.result['data']["serious_illness"] = {"data": {}}
                basedataS = self.result['data']["serious_illness"]["data"]
                modelS = {}
                detailSI = self.s.get(
                    Detail_URL + "?dto['aae041']=" + startTime +
                    "&dto['aae042']=" + endTime +
                    "&dto['aae140_md5list']=&dto['aae140']=330")
                if 'lists' in json.loads(detailSI.text):
                    sSI = json.loads(
                        detailSI.text)['lists']['dg_payment']['list']
                    for q in range(len(sSI)):
                        yearQ = str(sSI[q]['aae002'])[0:4]
                        monthQ = str(sSI[q]['aae002'])[4:6]
                        basedataS.setdefault(yearQ, {})
                        basedataS[yearQ].setdefault(monthQ, [])

                        modelS = {
                            '缴费单位': sSI[q]['aab004'],
                            '缴费时间': sSI[q]['aae002'],
                            '缴费类型': '',
                            '缴费基数': sSI[q]['yac004'],
                            '公司缴费': sSI[q]['dwjfje'],
                            '个人缴费': ''
                        }
                        basedataS[yearQ][monthQ].append(modelS)

                # 六险状态
                stype = self.s.get(
                    "https://gr.cdhrss.gov.cn:442/cdwsjb/personal/query/queryCZInsuranceInfoAction.do"
                )
                stypes = BeautifulSoup(stype.text, 'html.parser').find(
                    'div', {'id': 'SeInfo'})
                stype2 = json.loads(
                    stypes.text.split('data')[40].split(';')[0].replace(
                        '=', ''))['list']
                yanglao = "0"
                yiliao = "0"
                shiye = "0"
                gongshang = "0"
                shengyu = "0"
                dabing = "0"
                for lx in range(len(stype2)):
                    if (stype2[lx]['aae140'] == "110"):
                        yanglao = stype2[lx]['aac031']
                    elif (stype2[lx]['aae140'] == "310"):
                        yiliao = stype2[lx]['aac031']
                    elif (stype2[lx]['aae140'] == "210"):
                        shiye = stype2[lx]['aac031']
                    elif (stype2[lx]['aae140'] == "410"):
                        gongshang = stype2[lx]['aac031']
                    elif (stype2[lx]['aae140'] == "510"):
                        shengyu = stype2[lx]['aac031']
                    elif (stype2[lx]['aae140'] == "330"):
                        dabing = stype2[lx]['aac031']

                social_Type = {
                    '养老': self._convert_type(yanglao),
                    '医疗': self._convert_type(yiliao),
                    '大病': self._convert_type(dabing),
                    '失业': self._convert_type(shiye),
                    '工伤': self._convert_type(gongshang),
                    '生育': self._convert_type(shengyu)
                }

                # 个人基本信息
                if (s['aac031'] == "参保缴费"):
                    status = "正常"
                else:
                    status = "异常"

                mcount = [
                    len(sEI) - 1,
                    len(sHI) - 1,
                    len(sII) - 1,
                    len(sCI) - 1,
                    len(sBI) - 1
                ]  # 缴费时长
                moneyCount = max(mcount)

                recentTime = ""
                startTime = ""
                if (len(sEI) > 0):
                    recentTime = sEI[0]['aae002']
                    startTime = sEI[len(sEI) - 1]['aae002']

                self.result_data['baseInfo'] = {
                    '姓名': s['aac003'],
                    '身份证号': s['aac002'],
                    '更新时间': time.strftime("%Y-%m-%d", time.localtime()),
                    '城市名称': '成都',
                    '城市编号': '510100',
                    '缴费时长': moneyCount,
                    '最近缴费时间': recentTime,
                    '开始缴费时间': startTime,
                    '个人养老累计缴费': peroldTotal,
                    '个人医疗累计缴费': permedicalTotal,
                    '五险状态': social_Type,
                    '账户状态': status,
                    '个人编号': s['aac001'],
                }

                self.result['identity'] = {
                    "task_name": "成都",
                    "target_name": s['aac003'],
                    "target_id": self.result['meta']["用户名"],
                    "status": status
                }

                #return
        except InvalidConditionError as e:
            raise PreconditionNotSatisfiedError(e)
Пример #10
0
class Task(AbsFetchTask):
    task_info = dict(city_name="深圳",
                     help="""<li>若您尚未激活或者没有在网上查询过您的社保卡,请点击激活社保账号</li>
        <li>如果您曾经激活过社保卡,但忘记密码,请点击忘记密码</li>
        <li>如办理社保卡时,没有登记手机号码或者更换手机号码,请本人携带身份证原件和新手机到社保分中心柜台办理注册手机变更业务。</li>
        """,
                     developers=[{
                         'name': '卜圆圆',
                         'email': '*****@*****.**'
                     }])

    def _get_common_headers(self):
        return {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'
        }

    def _query(self, params: dict):
        """任务状态查询"""
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()
        pass

    def _prepare(self, data=None):
        super()._prepare(data)
        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/web\/ImageCheck.jpg/g')
        proxy = webdriver.Proxy()
        proxy.proxy_type = ProxyType.DIRECT
        proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
        driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
        # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
        driver.set_page_load_timeout(20)
        # 设置10秒脚本超时时间
        driver.set_script_timeout(20)
        driver.get(LOGIN_PAGE_URL)

        return driver

    def _create_chrome_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            driver_type=DriverType.CHROME)
        return driver

    def _setup_task_units(self):
        """设置任务执行单元"""
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch_userinfo, self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '用户名' in params, '缺少用户名'
        assert '密码' in params, '缺少密码'
        # other check
        用户名 = params['用户名']
        密码 = params['密码']

        if len(用户名) == 0:
            raise InvalidParamsError('用户名为空,请输入用户名')
        elif len(用户名) < 5:
            raise InvalidParamsError('用户名不正确,请重新输入')

        if len(密码) == 0:
            raise InvalidParamsError('密码为空,请输入密码!')
        elif len(密码) < 6:
            raise InvalidParamsError('密码不正确,请重新输入!')

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '用户名' not in params:
                params['用户名'] = meta.get('用户名')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '用户名' and '用户名' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            elif pr['key'] == 'other':
                continue
            res.append(pr)
        return res

    def get_js(self):
        # f = open("D:/WorkSpace/MyWorkSpace/jsdemo/js/des_rsa.js",'r',encoding='UTF-8')
        f = open("ceshi.js", 'r', encoding='UTF-8')
        line = f.readline()
        htmlstr = ''
        while line:
            htmlstr = htmlstr + line
            line = f.readline()
        return htmlstr

    def _unit_login(self, params: dict):
        err_msg = None
        if params:
            try:
                self._check_login_params(params)
                username = params['用户名']
                password = params['密码']
                vc = params['vc']

                #skeys=resp.cookies._cookies['seyb.szsi.gov.cn']['/web/ggfw/app']['skey'].value
                i = 0
                while (True):
                    resp = self.s.get(
                        'https://seyb.szsi.gov.cn/web/ggfw/app/index.html',
                        timeout=30)
                    skeys = resp.cookies._cookies.get("seyb.szsi.gov.cn")
                    if skeys:
                        skeys = skeys['/web/ggfw/app']['skey'].value
                        break
                    else:
                        i = i + 1
                        time.sleep(1)
                        if i > 3:
                            raise InvalidParamsError('网络异常,请重新刷新!')

                jsstrs = self.s.get(
                    "https://seyb.szsi.gov.cn/web/js/comm/fw/encrypt.js",
                    timeout=20)
                ctx = execjs.compile(jsstrs.content.decode("utf-8"))
                mmmm = ctx.call('encrypt', skeys, password)
                mmjm = ctx.call('stringToHex', mmmm)

                # jsstr = self.get_js()
                # ctxs = execjs.compile(jsstr)
                # mmmms = ctx.call('encrypt',skeys,password)
                # mmjms = ctx.call('stringToHex', mmmms)
                resp = self.s.post(LOGIN_URL,
                                   data=dict(r=random.random(),
                                             LOGINID=username,
                                             PASSWORD=mmjm,
                                             IMAGCHECK=vc,
                                             OPERTYPE2=3,
                                             ISBIND='false',
                                             now=time.strftime(
                                                 '%a %b %d %Y %H:%M:%S',
                                                 time.localtime()),
                                             callback=''))
                soup = BeautifulSoup(resp.content, 'html.parser')
                jsonread = json.loads(
                    soup.text.replace('(', '').replace(')', ''))
                flag = jsonread['flag']
                errormsg = jsonread['message']
                if flag == '-1':
                    raise InvalidParamsError(errormsg)

                #self._do_login(username, password, vc)

                # 登录成功
                self.s.Token = self.s.cookies._cookies['seyb.szsi.gov.cn'][
                    '/']['Token'].value

                # 查询(点击查询)
                strr = '?r=' + str(random.random())
                resp = self.s.post(
                    USERINFO_URL + strr,
                    data=dict(
                        _isModel='true',
                        params=
                        '{"oper":"FrontPageAction.queryNavDimension","params":{},"datas":{"@tmpGtDatas":{"业务类型":"5"}}}'
                    ),
                    headers={
                        'X-Requested-With': 'XMLHttpRequest',
                        'Content-Type':
                        'application/x-www-form-urlencoded; charset=UTF-8',
                        'Accept': 'application / json, text / plain, * / *',
                        'Token': self.s.Token,
                        'Connection': 'keep - alive'
                    },
                    timeout=15)
                self.s.Token = resp.cookies._cookies['seyb.szsi.gov.cn']['/'][
                    'Token'].value

                self.result_data["baseInfo"] = {
                    '城市名称': '深圳',
                    '城市编号': '440300',
                    '更新时间': time.strftime("%Y-%m-%d", time.localtime())
                }
                # 查询(点击业务查询请求三次)
                '''第一次'''
                strr = '?r=' + str(random.random())
                resp = self.s.post(
                    USERINFO_URL + strr,
                    data=dict(
                        _isModel='true',
                        params=
                        '{"oper":"UnitHandleCommAction.insertLogRecord","params":{},"datas":{"@tmpGtDatas":{"rightId":"500101","rightName":"参保基本信息查询","recordType":"1"}}}'
                    ),
                    headers={
                        'X-Requested-With': 'XMLHttpRequest',
                        'Content-Type':
                        'application/x-www-form-urlencoded; charset=UTF-8',
                        'Accept': 'application / json, text / plain, * / *',
                        'Token': self.s.Token,
                        'Connection': 'keep - alive'
                    },
                    timeout=15)

                self.g.Token = resp.cookies._cookies['seyb.szsi.gov.cn']['/'][
                    'Token'].value
                '''第二次'''
                datass = dict(
                    _isModel='true',
                    params=
                    '{"oper":"CbjbxxcxAction.queryGrcbjbxx","params":{},"datas":{"ncm_gt_用户信息":{"params":{}},"ncm_gt_参保状态":{"params":{}},"ncm_gt_缴纳情况":{"params":{}}}}'
                )
                strrs = USERINFO_URL + '?r=' + str(random.random())
                resps = self.s.post(
                    strrs,
                    datass,
                    headers={
                        'X-Requested-With': 'XMLHttpRequest',
                        'Accept-Language': 'zh-CN,zh;q=0.8',
                        'Accept-Encoding': 'gzip, deflate, br',
                        'Connection': 'keep - alive',
                        'Content-Type':
                        'application/x-www-form-urlencoded; charset=UTF-8',
                        'Accept': 'application/json,text/plain, */*',
                        'Token': self.s.Token,
                        'Referer':
                        'https://seyb.szsi.gov.cn/web/ggfw/app/index.html',
                        'Origin': 'https://seyb.szsi.gov.cn',
                        'Host': 'seyb.szsi.gov.cn'
                    },
                    timeout=15)
                # print(resps.text)
                soup = BeautifulSoup(resps.content, 'html.parser')
                self.s.Token = resps.cookies._cookies['seyb.szsi.gov.cn']['/'][
                    'Token'].value
                jsonread = json.loads(soup.text)
                if jsonread['flag'] != '-1':
                    userinfo = jsonread['datas']
                    fivedic = {}
                    for k, v in userinfo['ncm_gt_用户信息']['params'].items():
                        if k.find('参保状态') > 0:
                            fivedic.setdefault(k[:2], v)
                        else:
                            if k == '户籍类别':
                                self.result_data["baseInfo"].setdefault(
                                    '户口性质', v)
                            else:
                                self.result_data["baseInfo"].setdefault(k, v)
                            if k == '姓名':
                                self.result_identity['target_name'] = v
                            if k == '身份证号':
                                self.result_identity['target_id'] = v

                    monthnum = 0
                    for k, v in userinfo['ncm_gt_缴纳情况']['params'].items():
                        if k == '养老保险累计月数':
                            self.result_data["baseInfo"].setdefault(
                                '养老实际缴费月数', v)
                        elif k == '失业保险累计月数':
                            self.result_data["baseInfo"].setdefault(
                                '失业实际缴费月数', v)
                        else:
                            self.result_data["baseInfo"].setdefault(k, v)
                        if k.find('保险累计月数') > -1:
                            if (monthnum < int(v)):
                                monthnum = int(v)

                    self.result_data["baseInfo"].setdefault('缴费时长', monthnum)
                    self.result_data["baseInfo"].setdefault('五险状态', fivedic)
                    if '参加' in fivedic.values():
                        self.result_identity['status'] = '正常'
                    else:
                        self.result_identity['status'] = '停缴'
                else:
                    raise InvalidParamsError(
                        '请您登录社保官网输入社保个人电脑号完成身份认证后,再做查询操作。')

                self.result_key = username

                # 保存到meta
                self.result_meta['用户名'] = username
                self.result_meta['密码'] = password

                self.result_identity['task_name'] = '深圳'

                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)

        raise AskForParamsError([
            dict(key='用户名',
                 name='用户名',
                 cls='input',
                 value=params.get('用户名', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc',
                 name='验证码',
                 cls='data:image',
                 query={'t': 'vc'},
                 value=params.get('vc', '')),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(LOGIN_PAGE_URL)
            # 等待lk请求
            # WebDriverWait(driver, 10).until(value_is_number((By.XPATH, '//*[@id="lk"]')))

            # 选择身份证号方式登录
            driver.find_element_by_xpath(
                '/html/body/div[2]/div/div/div/div/div/div[2]/div[2]/div/div[1]/a'
            ).click()

            username_input = driver.find_element_by_xpath(
                '//*[@id="div_dialog_login"]/div/div/div/form/div[4]/div/div[1]/div/input'
            )
            password_input = driver.find_element_by_xpath(
                '//*[@id="div_dialog_login"]/div/div/div/form/div[4]/div/div[2]/div/input'
            )
            vc_input = driver.find_element_by_xpath(
                '//*[@id="div_dialog_login"]/div/div/div/form/div[4]/div/div[3]/div/input'
            )
            submit_btn = driver.find_element_by_xpath(
                '//*[@id="div_dialog_login"]/div/div/div/form/div[5]/input[1]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)

            vc_input.clear()
            vc_input.send_keys(vc)
            s = driver.find_element_by_tag_name('html').get_attribute(
                'innerHTML')

            # Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()
            # 提交
            submit_btn.click()
            time.sleep(5)
            login_page_html = driver.find_element_by_tag_name(
                'html').get_attribute('innerHTML')
            soup = BeautifulSoup(login_page_html, 'html.parser')
            # WebDriverWait(driver, 10).until(
            #     lambda driver:
            #         EC.invisibility_of_element_located((By.XPATH, 'html/body/div[2]/div/div/div/div[1]/div/div[2]/div[2]/div/div[1]/a[1]'))(driver)
            #     or EC.element_to_be_clickable((By.XPATH, '//*[@id="div_dialog_login"]/div/div/div/form/div[5]/input[1]'))(driver))
            #
            # login_btn = driver.find_element_by_xpath(
            #     'html/body/div[2]/div/div/div/div[1]/div/div[2]/div[2]/div/div[1]/a[1]')
            #
            # s = login_btn.get_attribute('style')
            # Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()
            # if not s:
            #     # failed
            #     err_msg = driver.find_element_by_xpath('//*[@id="div_dialog_login"]/div/div/div/form/div[3]/font').text
            #     raise InvalidParamsError(err_msg)
            #     # TODO
            if len(soup.select('.ng-binding')
                   [1].text) == 16:  # len(soup.findAll('a')[13].attrs)
                err_msg = soup.select('.ng-binding')[2].text
                raise InvalidParamsError(err_msg)
            else:
                # success
                print('success')
                # Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()

                # 保存登录后的页面内容供抓取单元解析使用
                # login_page_html = driver.find_element_by_tag_name('html').get_attribute('innerHTML')
                #
                # # print(login_page_html[login_page_html.find('欢迎')-5:login_page_html.find('欢迎')+15])
                # # if login_page_html.find('<a ng-show="!ncUser" ng-click="login()" style="display: none;">')==-1:
                # resp = self.s.post(LOGIN_URL, data=dict(
                #     r=random.random(),
                #     LOGINID=username,
                #     PASSWORD=login_page_html[login_page_html.find('PASSWORD='******'&amp;IMAGCHECK=')],
                #     IMAGCHECK=vc,
                #     OPERTYPE2=3,
                #     ISBIND='false',
                #     now=time.strftime('%a %b %d %Y %H:%M:%S', time.localtime()),
                #     callback=''
                # ))
                # soup = BeautifulSoup(resp.content, 'html.parser')
                # jsonread = json.loads(soup.text.replace('(','').replace(')',''))
                # flag=jsonread['flag']
                # errormsg = jsonread['message']
                # if flag=='-1':
                #     raise InvalidParamsError(errormsg)

    def _unit_fetch_userinfo(self):
        """用户信息"""
        try:
            '''第三次'''
            strr = '?r=' + str(random.random())
            resp = self.s.post(
                USERINFO_URL + strr,
                data=dict(
                    _isModel='true',
                    params=
                    '{"oper":"QfzscxAction.queryQfzs","params":{},"datas":{"ncm_gt_欠费总数":{"params":{}}}}'
                ),
                headers={
                    'X-Requested-With': 'XMLHttpRequest',
                    'Content-Type':
                    'application/x-www-form-urlencoded; charset=UTF-8',
                    'Accept': 'application / json, text / plain, * / *',
                    'Token': self.g.Token,
                    'Connection': 'keep - alive'
                },
                timeout=15)
            self.s.Token = resp.cookies._cookies['seyb.szsi.gov.cn']['/'][
                'Token'].value

            # TODO: 执行任务,如果没有登录,则raise PermissionError
            return
        except PermissionError as e:
            raise PreconditionNotSatisfiedError(e)

    def _unit_fetch(self):
        """五险"""
        try:
            strr = USERINFO_URL + '?r=' + str(random.random())
            resp = self.s.post(
                strr,
                data=dict(
                    _isModel='true',
                    params=
                    '{"oper":"UnitHandleCommAction.insertLogRecord","params":{},"datas":{"@tmpGtDatas":{"rightId":"500201","rightName":"参保缴费明细查询","recordType":"1"}}}'
                ),
                headers={
                    'X-Requested-With': 'XMLHttpRequest',
                    'Content-Type':
                    'application/x-www-form-urlencoded; charset=UTF-8',
                    'Accept': 'application / json, text / plain, * / *',
                    'Connection': 'keep - alive',
                    'Token': self.s.Token
                })
            self.g.Token = resp.cookies._cookies['seyb.szsi.gov.cn']['/'][
                'Token'].value

            # strr = USERINFO_URL + '?r=' + str(random.random())
            # resp = self.s.post(strr, datas, headers={'X-Requested-With': 'XMLHttpRequest',
            #                                          'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
            #                                          'Accept': 'application/json,text/plain,*/*',
            #                                          'Accept-Encoding':'gzip,deflate,br',
            #                                          'Accept-Language':'zh-CN,zh;q=0.8',
            #                                          'Connection': 'keep-alive', 'Token': self.s.Token,
            #                                          'Host': 'seyb.szsi.gov.cn', 'Origin': 'https://seyb.szsi.gov.cn',
            #                                          'Referer': 'https://seyb.szsi.gov.cn/web/ggfw/app/index.html'})
            self.g.Token = self.s.Token
            # 明细(险种比较多)arrtype={'01':'基本养老保险','02':'失业保险','03':'基本医疗保险','04':'工伤保险','05':'生育保险'}
            arrtype = {
                'Yl': 'old_age',
                'Shiye': 'unemployment',
                'Yil': 'medical_care',
                'Gs': 'injuries',
                'Sy': 'maternity'
            }
            arrmingxi = [
                'ncm_glt_养老缴费明细', 'ncm_glt_失业缴费明细', 'ncm_glt_医疗缴费明细',
                'ncm_glt_工伤缴费明细', 'ncm_glt_生育缴费明细'
            ]
            statetime = ''
            endtime = ''
            ii = 0
            for k, v in arrtype.items():
                self.result_data[v] = {}
                self.result_data[v]['data'] = {}
                years = ''
                months = ''
                personjfsum = 0.00
                datas = dict(
                    _isModel='true',
                    params='{"oper": "CbjfmxcxAction.queryCbjfmx' + k +
                    '", "params": {}, "datas": {"' + arrmingxi[ii] +
                    '": {"params": {"pageSize": 10, "curPageNum": 1}, "dataset": [], "heads": [],"heads_change": []}}}'
                )
                strr = USERINFO_URL + '?r=' + str(random.random())
                resp = self.s.post(
                    strr,
                    datas,
                    headers={
                        'X-Requested-With':
                        'XMLHttpRequest',
                        'Content-Type':
                        'application/x-www-form-urlencoded; charset=UTF-8',
                        'Accept':
                        'application / json, text / plain, * / *',
                        'Connection':
                        'keep - alive',
                        'Token':
                        self.g.Token,
                        'Host':
                        'seyb.szsi.gov.cn',
                        'Origin':
                        'https://seyb.szsi.gov.cn',
                        'Referer':
                        'https://seyb.szsi.gov.cn/web/ggfw/app/index.html'
                    },
                    timeout=15)
                self.g.Token = resp.cookies._cookies['seyb.szsi.gov.cn']['/'][
                    'Token'].value
                pagearr = json.loads(resp.text)
                """获取分页"""
                if pagearr['flag'] != '-1':
                    if 'datas' in pagearr.keys():
                        pagesize = pagearr["datas"][
                            arrmingxi[ii]]['params']['pageSize']
                        rowsCount = pagearr["datas"][
                            arrmingxi[ii]]['params']['rowsCount']
                        pagenum = rowsCount / pagesize
                        pagenums = rowsCount // pagesize
                        if pagenum > pagenums:
                            pagenums = pagenums + 1
                        for i in range(1, pagenums + 1):
                            if i != 1:
                                datas = dict(
                                    _isModel='true',
                                    params=
                                    '{"oper": "CbjfmxcxAction.queryCbjfmx' +
                                    k + '", "params": {}, "datas": {"' +
                                    arrmingxi[ii] +
                                    '": {"params": {"pageSize": 10, "curPageNum": '
                                    + str(i) +
                                    ',"maxPageSize":50,"rowsCount":' +
                                    str(rowsCount) +
                                    ',"Total_showMsg":null,"Total_showMsgCell":null,"Total_Cols":[]},"heads":[],"heads_change":[],"dataset":[]}}}'
                                )
                                strr = USERINFO_URL + '?r=' + str(
                                    random.random())
                                resp = self.s.post(
                                    strr,
                                    datas,
                                    headers={
                                        'X-Requested-With':
                                        'XMLHttpRequest',
                                        'Content-Type':
                                        'application/x-www-form-urlencoded; charset=UTF-8',
                                        'Accept':
                                        'application / json, text / plain, * / *',
                                        'Connection':
                                        'keep - alive',
                                        'Token':
                                        self.g.Token,
                                        'Host':
                                        'seyb.szsi.gov.cn',
                                        'Origin':
                                        'https://seyb.szsi.gov.cn',
                                        'Referer':
                                        'https://seyb.szsi.gov.cn/web/ggfw/app/index.html'
                                    },
                                    timeout=15)
                                self.g.Token = resp.cookies._cookies[
                                    'seyb.szsi.gov.cn']['/']['Token'].value
                            mx = json.loads(resp.text)["datas"]
                            for i in range(0,
                                           len(mx[arrmingxi[ii]]['dataset'])):
                                arr = []
                                if v == 'old_age' or v == 'medical_care':
                                    personjfsum = personjfsum + float(
                                        mx[arrmingxi[ii]]['dataset'][i]['个人缴'])
                                    # enterjfsum=enterjfsum+float(mx['dataset'][i]['单位缴'])
                                yearmonth = mx[arrmingxi[ii]]['dataset'][i][
                                    '缴费年月'].replace('年', '').replace('月', '')
                                if len(yearmonth) == 5:
                                    yearmonth = yearmonth[:4] + '0' + yearmonth[
                                        -1:]
                                if statetime == '':
                                    statetime = yearmonth
                                elif int(statetime) > int(yearmonth):
                                    statetime = yearmonth
                                if endtime == '':
                                    endtime = yearmonth
                                elif int(endtime) < int(yearmonth):
                                    endtime = yearmonth
                                if years == '' or years != yearmonth[:4]:
                                    years = yearmonth[:4]
                                    self.result_data[v]['data'][years] = {}
                                    if len(months) > 0:
                                        if months == yearmonth[-2:]:
                                            self.result_data[v]['data'][years][
                                                months] = {}
                                if months == '' or months != yearmonth[-2:]:
                                    months = yearmonth[-2:]
                                    self.result_data[v]['data'][years][
                                        months] = {}
                                mxdic = {
                                    '缴费时间': yearmonth,
                                    '缴费类型': '',
                                    '缴费基数':
                                    mx[arrmingxi[ii]]['dataset'][i]['缴费工资'],
                                    '公司缴费':
                                    mx[arrmingxi[ii]]['dataset'][i]['单位缴'],
                                    '个人缴费':
                                    mx[arrmingxi[ii]]['dataset'][i]['个人缴'],
                                    '缴费单位':
                                    mx[arrmingxi[ii]]['dataset'][i]['单位名称'],
                                    '单位编号':
                                    mx[arrmingxi[ii]]['dataset'][i]['单位编号'],
                                    '缴费合计':
                                    mx[arrmingxi[ii]]['dataset'][i]['缴费合计'],
                                    '备注': mx[arrmingxi[ii]]['dataset'][i]['备注']
                                }
                                arr.append(mxdic)
                                self.result_data[v]['data'][years][
                                    months] = arr

                        if v == 'old_age':
                            self.result_data["baseInfo"].setdefault(
                                '个人养老累计缴费', personjfsum)
                        if v == 'medical_care':
                            self.result_data["baseInfo"].setdefault(
                                '个人医疗累计缴费', personjfsum)
                ii = ii + 1
            self.result_data["baseInfo"].setdefault('最近缴费时间', endtime)
            self.result_data["baseInfo"].setdefault('开始缴费时间', statetime)
            # TODO: 执行任务,如果没有登录,则raise PermissionError
            return
        except PermissionError as e:
            raise PreconditionNotSatisfiedError(e)

    # 刷新验证码
    def _new_vc(self):
        resp = self.s.get(VC_URL, timeout=20)
        return dict(cls='data:image',
                    content=resp.content,
                    content_type=resp.headers.get('Content-Type'))
Пример #11
0
class Task(AbsFetchTask):
    task_info = {
        'task_name': '测试selenium登录',
        'help': '测试selenium登录'
    }

    def _get_common_headers(self):
        return {
            'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
        }

    def _prepare(self, data=None):
        super()._prepare(data)

        _driver = None
        if 'driver_data' in self.state:
            _driver = dill.loads(self.state['driver_data'])
            del self.state['driver_data']
        self.dsc = DriverRequestsCoordinator(d=_driver, create_session=self._create_session, create_driver=self._create_driver)

    def _create_driver(self):
        driver = create_driver()
        driver.get(LOGIN_PAGE_URL)
        return driver

    def _create_session(self):
        return self.s

    def _update_session_data(self):
        super()._update_session_data()
        if self.dsc.d_is_created:
            self.state['driver_data'] = dill.dumps(self.dsc.d)

    def _setup_task_units(self):
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _query(self, params: dict):
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()

    def _new_vc(self):
        self.dsc.create_driver()
        self.dsc.create_session()

        vc_url = VC_IMAGE_URL + str(int(time.time() * 1000))
        resp = self.s.get(vc_url)
        return dict(cls='data:image', content=resp.content, content_type=resp.headers.get('Content-Type'))

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '身份证号' not in params:
                params['身份证号'] = meta.get('身份证号')
            if '查询密码' not in params:
                params['查询密码'] = meta.get('查询密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '身份证号' and '身份证号' in meta:
                continue
            elif pr['key'] == '查询密码' and '查询密码' in meta:
                continue
            res.append(pr)
        return res

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '身份证号' in params, '缺少身份证号'
        assert '查询密码' in params, '缺少查询密码'
        assert 'vc' in params, '缺少验证码'

    def _unit_login(self, params=None):
        err_msg = None
        if params:
            try:
                self._check_login_params(params)
                username = params['身份证号']
                password = params['查询密码']
                vc = params['vc']

                self._do_login(username, password, vc)
                # 登录成功
                self.result_key = username
                self.result_meta.update({
                    '身份证号': username,
                    '密码': password
                })
                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)

        raise AskForParamsError([
            dict(key='身份证号', name='身份证号', cls='input', value=params.get('身份证号', '')),
            dict(key='查询密码', name='个人编号', cls='input', value=params.get('查询密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx(excepted_exceptions=(InvalidParamsError,)) as driver:
            # 选择身份证号方式登录
            driver.find_element_by_xpath('/html/body/table[2]/tbody/tr[3]/td/table/tbody/tr/td/div/form/div[1]/ul/li[3]/a').click()

            username_input = driver.find_element_by_xpath('//*[@id="bh1"]')
            password_input = driver.find_element_by_xpath('//*[@id="mm1"]')
            vc_input = driver.find_element_by_xpath('//*[@id="login_tab_2"]/div/div[3]/input')
            submit_btn = driver.find_element_by_xpath('//*[@id="login_tab_2"]/div/div[4]/input[1]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)
            vc_input.clear()
            vc_input.send_keys(vc)
            # 提交
            submit_btn.click()

            if not driver.current_url == 'http://www.bjgjj.gov.cn/wsyw/wscx/gjjcx-choice.jsp':
                raise InvalidParamsError('登录失败,请检查输入')

            # 登录成功
            # 同步cookie到session
            self.dsc.create_session()
            self.dsc.inc_and_sync_d_cookies()

            # 保存登录后的页面内容供抓取单元解析使用
            self.g.login_page_html = driver.find_element_by_tag_name('html').get_attribute('innerHTML')
            self.g.current_url = driver.current_url

    def _unit_fetch(self):
        try:
            # TODO:
            soup = bs4.BeautifulSoup(self.g.login_page_html, 'html.parser')
            a = soup.select('a')[1]
            link = a.attrs['onclick'].split('"')[1]
            link = os.path.join(os.path.dirname(self.g.current_url), link)

            resp = self.s.get(link)
            print(html.unescape(resp.text))
            self.result_data.update({
                'xxx': a.text,
                'link': link
            })
            self.result_identity.update({
                'task_name': self.task_info['task_name']
            })
        except InvalidConditionError as e:
            raise PreconditionNotSatisfiedError(e)
Пример #12
0
class Task(AbsFetchTask):
    task_info = dict(
        city_name="北京市",
        expect_time=10,
        help=
        """<li>首次登陆查询功能,验证方式必须选择联名卡号;初始密码为身份证后四位阿拉伯数字+00。为了保证您的个人信息安全,请您及时修改初始密码,如有问题请拨打“住房公积金热线12329”咨询</li>
            """,
        developers=[{
            'name': '赵伟',
            'email': '*****@*****.**'
        }])

    def _get_common_headers(self):
        return {'User-Agent': USER_AGENT}

    def _prepare(self, data=None):
        super()._prepare(data)

        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/PicCheckCode1/g')
        proxy = webdriver.Proxy()
        proxy.proxy_type = ProxyType.DIRECT
        proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
        driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
        # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
        driver.set_page_load_timeout(13)
        # 设置10秒脚本超时时间
        driver.set_script_timeout(13)
        return driver

    def _setup_task_units(self):
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _query(self, params: dict):
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()

    def _new_vc(self):
        vc_url = VC_IMAGE_URL + str(int(time.time()) * 1000)
        resp = self.s.get(vc_url, verify=False)
        return dict(cls='data:image',
                    content=resp.content,
                    content_type=resp.headers.get('Content-Type'))

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if 'bh1' not in params:
                params['bh1'] = meta.get('账号')
            if 'mm1' not in params:
                params['mm1'] = meta.get('密码')
            if 'bh5' not in params:
                params['bh5'] = meta.get('账号')
            if 'mm5' not in params:
                params['mm5'] = meta.get('密码')
            if 'other' not in params:
                params['other'] = meta.get('类型Code')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # 身份证
            if meta['类型Code'] == '3':
                if pr['key'] == 'bh5':
                    continue
                if pr['key'] == 'mm5':
                    continue
                if pr['key'] == 'bh1' and '账号' in meta:
                    continue
                elif pr['key'] == 'mm1' and '密码' in meta:
                    continue
                res.append(pr)
            # 联名卡
            elif meta['类型Code'] == '1':
                if pr['key'] == 'bh1':
                    continue
                if pr['key'] == 'mm1':
                    continue
                if pr['key'] == 'bh5' and '账号' in meta:
                    continue
                elif pr['key'] == 'mm5' and '密码' in meta:
                    continue
                res.append(pr)
            else:
                res.append(pr)
        return res

    def _check_login_parama(self, params):
        assert params is not None, '缺少参数'
        assert 'other' in params, '请选择登录方式'
        if params["other"] == "1":
            assert 'bh5' in params, '缺少联名卡号'
            assert 'mm5' in params, '缺少密码'
        elif params["other"] == "3":
            assert 'bh1' in params, '缺少身份证号码'
            assert 'mm1' in params, '缺少密码'
            r = r'(^\d{15}$)|(^\d{18}$)|(^\d{17}(\d|X|x)$)'
            assert re.findall(r, params['bh1']), '请输入有效的身份证编号'

        assert 'vc' in params, '缺少验证码'

        # TODO: 检验身份证
        # TODO: 检验密码
        # TODO: 检验验证码

    def _unit_login(self, params=None):
        err_msg = None
        if params:
            try:
                self._check_login_parama(params)
                if params["other"] == "3":
                    code = "1"
                elif params["other"] == "1":
                    code = "5"
                else:
                    code = "1"
                username = params['bh' + code]
                password = params['mm' + code]
                vc = params['vc']

                self._do_login(username, password, vc, params["other"])
                # 登录成功
                self.result_key = username
                self.result_meta.update({
                    '账号': username,
                    '密码': password,
                    '类型Code': params["other"]
                })
                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)
        raise AskForParamsError([
            dict(
                key='other',
                name=
                '[{"tabName":"身份证号","tabCode":"3","isEnable":"1"},{"tabName":"联名卡号","tabCode":"1","isEnable":"1"}]',
                cls='tab',
                value=params.get('类型Code', '')),
            dict(key='bh1',
                 name='身份证号',
                 cls='input',
                 tabCode="3",
                 value=params.get('账号', '')),
            dict(key='mm1',
                 name='密码',
                 cls='input:password',
                 tabCode="3",
                 value=params.get('密码', '')),
            dict(key='bh5',
                 name='联名卡号',
                 cls='input',
                 tabCode="1",
                 value=params.get('账号', '')),
            dict(key='mm5',
                 name='密码',
                 cls='input:password',
                 tabCode="1",
                 value=params.get('密码', '')),
            dict(key='vc',
                 name='验证码',
                 cls='data:image',
                 query={'t': 'vc'},
                 tabCode="[3,1]",
                 value=''),
        ], err_msg)

    def _do_login(self, username, password, vc, type):
        """使用 web driver 模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开页面
            driver.get(LOGIN_PAGE_URL)
            # 等待lk请求
            WebDriverWait(driver, 10).until(
                value_is_number((By.XPATH, '//*[@id="lk"]')))

            # 选择身份证号方式登录
            driver.find_element_by_xpath(
                '/html/body/table[2]/tbody/tr[3]/td/table/tbody/tr/td/div/form/div[1]/ul/li['
                + type + ']/a').click()
            if type == "1":
                id = "5"
                input = "0"
            elif type == "3":
                id = "1"
                input = "2"

            username_input = driver.find_element_by_xpath('//*[@id="bh' + id +
                                                          '"]')
            password_input = driver.find_element_by_xpath('//*[@id="mm' + id +
                                                          '"]')
            vc_input = driver.find_element_by_xpath('//*[@id="login_tab_' +
                                                    input +
                                                    '"]/div/div[3]/input')
            submit_btn = driver.find_element_by_xpath('//*[@id="login_tab_' +
                                                      input +
                                                      '"]/div/div[4]/input[1]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)

            # 验证码
            vc_input.clear()
            vc_input.send_keys(vc)

            # 提交
            submit_btn.click()

            # for test
            # img = Image.open(io.BytesIO(driver.get_screenshot_as_png()))
            # img.show()
            # 保存登录后的页面内容供抓取单元解析使用
            self.g.login_page_html = driver.find_element_by_tag_name(
                'html').get_attribute('innerHTML')
            self.g.current_url = driver.current_url
            if not driver.current_url == LOGINED_URL:
                # 有一种情况可能不跳转到公司的列表页
                if RESULT_URL in driver.current_url:
                    self.g.login_page_html = '''
                    <table id="new-mytable">
                        <tbody><tr style="background-color: rgb(241, 241, 241);">
                        <th class="style21"><div align="center">开户登记号</div></th>
                        <th class="style21"><div align="center">单位名称</div></th>
                        <th class="style21"><div align="center">缴存状态</div></th>
                        </tr>
                        <script>mnbbc='3';</script>
                        <tr style="background-color: rgb(241, 241, 241);">
                        <td class="style21"><div align="center">000000</div></td>
                        <td class="style21"><div><a href="#" onclick='javascript:window.open("''' + driver.current_url + '''","","top=0,left=0,toolbar=no,location=no,status=no,menubar=no,scrollbars=yes,resizable=yes,width=550,height=500");'></a></div></td>
                        <td class="style21"><div align="center">缴存</div></td>
                        </tr>
                        </tbody>
                    </table>
                    '''
                    pass
                else:
                    raise InvalidParamsError('登录失败,请检查输入')
            else:
                # 部分登录页面可登录进去但是密码不安全提示
                soup = bs4.BeautifulSoup(self.g.login_page_html, 'html.parser')
                companyList = soup.findAll("table", {"id": "new-mytable"})
                error = soup.find("span", {"class": "tittle1"})
                if companyList.__len__() <= 0 and error:
                    errorInfo = error.text.replace("\n", "")
                    raise InvalidParamsError(errorInfo)

    def _unit_fetch(self):
        try:
            # 初始化抓取信息
            self.result_data["baseInfo"] = {}
            self.result_data["companyList"] = []
            self.result_data["detail"] = {"data": {}}

            # 渲染登录后页面
            soup = bs4.BeautifulSoup(self.g.login_page_html, 'html.parser')
            companyList = soup.findAll("table", {"id": "new-mytable"})
            name = ''
            target_id = ''
            paymentStart = ''

            # 累计汇缴月数
            payMonth = []
            last_income = {"date": "", "data": ""}

            if len(companyList) > 0:
                trs = companyList[0].findAll("tr")
                i = 0
                trs.reverse()
                for tr in trs:
                    tds = tr.findAll("td")
                    if tr != trs[len(trs) - 1]:
                        a = tds[1].findAll("a")[0]
                        link = a.attrs['onclick'].split('"')[1]
                        link = parse.urljoin(self.g.current_url, link)
                        resp = self.s.get(link, verify=False)
                        try:
                            result = bs4.BeautifulSoup(resp.text,
                                                       'html.parser')
                            if result:
                                table = result.findAll("table")[1]
                                if table:
                                    _tds = table.findAll("td")
                                    name = _tds[27].text
                                    target_id = _tds[33].text
                                    paymentStart = _tds[45].text

                                    self.result_data["baseInfo"] = {
                                        "姓名":
                                        _tds[27].text,
                                        "证件号":
                                        _tds[33].text,
                                        "证件类型":
                                        _tds[31].text,
                                        "个人登记号":
                                        _tds[29].text,
                                        "更新时间":
                                        datetime.datetime.now().strftime(
                                            '%Y-%m-%d'),
                                        '城市名称':
                                        '北京市',
                                        '城市编号':
                                        '110100',
                                        '最近汇款日期':
                                        '',
                                        '最近汇款金额':
                                        0.0,
                                        '累计汇款次数':
                                        0.0
                                    }
                                    self.result_data["companyList"].append({
                                        "最后业务日期":
                                        re.sub('\s', '', _tds[53].text),
                                        "单位名称":
                                        _tds[37].text,
                                        "单位登记号":
                                        _tds[35].text,
                                        "所属管理部编号":
                                        _tds[39].text,
                                        "所属管理部名称":
                                        _tds[41].text,
                                        "当前余额":
                                        re.sub('\s', '',
                                               _tds[43].text).replace("元", ""),
                                        "帐户状态":
                                        _tds[45].text,
                                        "当年缴存金额":
                                        re.sub('\s', '',
                                               _tds[47].text).replace("元", ""),
                                        "当年提取金额":
                                        re.sub('\s', '',
                                               _tds[49].text).replace("元", ""),
                                        "上年结转余额":
                                        re.sub('\s', '',
                                               _tds[51].text).replace("元", ""),
                                        "转出金额":
                                        re.sub('\s', '',
                                               _tds[55].text).replace("元", "")
                                    })
                                detail_tag = result.findAll(
                                    "span", {"class": "style2"})
                                # 20177月份后数据不太准确
                                temp_detail = result.findAll(
                                    "table", {"id": "tab-style"})

                                temp_all_date = []
                                if len(detail_tag) > 0:
                                    detail_a = detail_tag[1].findAll("a")[0]
                                    detail_link = detail_a.attrs[
                                        'onclick'].split("'")[1]
                                    detail_link = parse.urljoin(
                                        self.g.current_url, detail_link)
                                    detail_resp = self.s.get(detail_link,
                                                             verify=False)
                                    detail_result = bs4.BeautifulSoup(
                                        detail_resp.content, 'html.parser')
                                    detail_table = detail_result.find(
                                        "table", {"id": "new-mytable3"})
                                    if detail_table:
                                        detail_trs = detail_table.findAll("tr")

                                        for detail_tr in detail_trs:
                                            detail_tds = detail_tr.findAll(
                                                "td")
                                            if detail_tds.__len__() == 0:
                                                continue
                                            if detail_tr != detail_trs[0]:
                                                date = re.sub(
                                                    '\s', '',
                                                    detail_tds[0].text)
                                                try:
                                                    self.result_data["detail"][
                                                        "data"][date[0:4]]
                                                except KeyError:
                                                    self.result_data["detail"][
                                                        "data"][
                                                            date[0:4]] = {}
                                                try:
                                                    self.result_data["detail"][
                                                        "data"][date[0:4]][
                                                            date[4:6]]
                                                except KeyError:
                                                    self.result_data["detail"][
                                                        "data"][date[0:4]][
                                                            date[4:6]] = []

                                                hj = re.sub(
                                                    '\s', '',
                                                    detail_tds[2].text)
                                                sr = re.sub(
                                                    '\s', '', detail_tds[3].
                                                    text).replace(",", "")
                                                try:
                                                    if (hj.find("汇缴")
                                                            or hj.find("补缴")
                                                        ) and Decimal(sr) > 0:
                                                        if last_income[
                                                                "date"] == "":
                                                            last_income.update(
                                                                {
                                                                    "date":
                                                                    date,
                                                                    "data": sr
                                                                })
                                                        else:
                                                            if int(last_income[
                                                                    "date"]
                                                                   ) < int(
                                                                       date):
                                                                last_income.update(
                                                                    {
                                                                        "date":
                                                                        date,
                                                                        "data":
                                                                        sr
                                                                    })
                                                        if date[0:
                                                                6] not in payMonth:
                                                            payMonth.append(
                                                                date[0:6])
                                                except:
                                                    pass
                                                temp_all_date.append(date)
                                                self.result_data["detail"][
                                                    "data"][date[0:4]][
                                                        date[4:6]].append({
                                                            "时间":
                                                            date[0:4] + "-" +
                                                            date[4:6] + "-" +
                                                            date[6:],
                                                            "类型":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[2].
                                                                text),
                                                            "汇缴年月":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[1].
                                                                text),
                                                            "收入":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[3].
                                                                text),
                                                            "支出":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[4].
                                                                text),
                                                            "余额":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[5].
                                                                text),
                                                            "单位名称":
                                                            _tds[37].text
                                                        })
                                if len(temp_detail) > 0:
                                    detail_trs = temp_detail[0].findAll("tr")
                                    for detail_tr in detail_trs:
                                        detail_tds = detail_tr.findAll("td")
                                        if detail_tds.__len__() == 0:
                                            continue
                                        if detail_tr != detail_trs[0]:
                                            date = re.sub(
                                                '\s', '', detail_tds[0].text)
                                            if date not in temp_all_date:
                                                try:
                                                    self.result_data["detail"][
                                                        "data"][date[0:4]]
                                                except KeyError:
                                                    self.result_data["detail"][
                                                        "data"][
                                                            date[0:4]] = {}
                                                try:
                                                    self.result_data["detail"][
                                                        "data"][date[0:4]][
                                                            date[4:6]]
                                                except KeyError:
                                                    self.result_data["detail"][
                                                        "data"][date[0:4]][
                                                            date[4:6]] = []

                                                hj = re.sub(
                                                    '\s', '',
                                                    detail_tds[2].text)
                                                sr = re.sub(
                                                    '\s', '', detail_tds[3].
                                                    text).replace(",", "")
                                                try:
                                                    if (hj.find("汇缴")
                                                            or hj.find("补缴")
                                                        ) and Decimal(sr) > 0:
                                                        if last_income[
                                                                "date"] == "":
                                                            last_income.update(
                                                                {
                                                                    "date":
                                                                    date,
                                                                    "data": sr
                                                                })
                                                        else:
                                                            if int(last_income[
                                                                    "date"]
                                                                   ) < int(
                                                                       date):
                                                                last_income.update(
                                                                    {
                                                                        "date":
                                                                        date,
                                                                        "data":
                                                                        sr
                                                                    })
                                                        if date[0:
                                                                6] not in payMonth:
                                                            payMonth.append(
                                                                date[0:6])
                                                except:
                                                    pass
                                                self.result_data["detail"][
                                                    "data"][date[0:4]][
                                                        date[4:6]].append({
                                                            "时间":
                                                            date[0:4] + "-" +
                                                            date[4:6] + "-" +
                                                            date[6:],
                                                            "类型":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[2].
                                                                text),
                                                            "汇缴年月":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[1].
                                                                text),
                                                            "收入":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[3].
                                                                text),
                                                            "支出":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[4].
                                                                text),
                                                            "余额":
                                                            re.sub(
                                                                '\s', '',
                                                                detail_tds[5].
                                                                text),
                                                            "单位名称":
                                                            _tds[37].text
                                                        })
                        except:
                            pass
                        i = i + 1
            if paymentStart == "繳存":
                paymentStart = "繳存"
            else:
                paymentStart = "封存"
            self.result_identity.update({
                'task_name':
                self.task_info['city_name'],
                'target_name':
                name,
                'target_id':
                target_id,
                'status':
                paymentStart
            })
            try:
                self.result_data["baseInfo"].update({
                    "最近汇款日期":
                    last_income["date"][0:4] + "-" + last_income["date"][4:6],
                    "最近汇款金额":
                    float(last_income["data"]),
                    "累计汇款次数":
                    payMonth.__len__(),
                })
            except:
                pass
        except InvalidConditionError as e:
            raise PreconditionNotSatisfiedError(e)
Пример #13
0
class Task(AbsFetchTask):
    # noinspection PyAttributeOutsideInit
    task_info = dict(
        city_name="烟台",
        help="""<li>如您未在社保网站查询过您的社保信息,请到烟台社保网上服务平台完成“注册”然后再登录。</li>
                <li>如您忘记密码,可使用注册时绑定的手机号或者电子邮箱进行密码找回;当不能通过手机和电子邮箱找回密码,需去社保机构现场重置密码。</li>""",
        developers=[{
            'name': '卜圆圆',
            'email': '*****@*****.**'
        }])

    def _get_common_headers(self):
        return {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36'
        }

    def _setup_task_units(self):
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch_name, self._unit_login)

    def _query(self, params: dict):
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()

    def _prepare(self, data=None):
        super()._prepare(data)
        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/web\/ImageCheck.jpg/g')
        driver.get(MAIN_URL)
        return driver

    # noinspection PyMethodMayBeStatic
    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '身份证号' in params, '缺少身份证号'
        assert '密码' in params, '缺少密码'
        assert 'vc' in params, '缺少验证码'
        # other check
        身份证号 = params['身份证号']
        密码 = params['密码']

        if len(身份证号) == 0:
            raise InvalidParamsError('身份证号为空,请输入身份证号')
        elif len(身份证号) < 15:
            raise InvalidParamsError('身份证号不正确,请重新输入')

        if len(密码) == 0:
            raise InvalidParamsError('密码为空,请输入密码!')
        elif len(密码) < 6:
            raise InvalidParamsError('密码不正确,请重新输入!')

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '身份证号' not in params:
                params['身份证号'] = meta.get('身份证号')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '身份证号' and '身份证号' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            res.append(pr)
        return res

    def _unit_login(self, params=None):
        err_msg = None
        if not self.is_start or params:
            # 非开始或者开始就提供了参数
            try:
                #self._new_vc()
                #vc=input('验证码:')
                self._check_login_params(params)
                id_num = params['身份证号']
                password = params['密码']
                m = hashlib.md5()
                m.update(str(password).encode(encoding="utf-8"))
                pw = m.hexdigest()
                vc = params['vc']
                self._do_login(id_num, password, vc)

                # xmlstr='<?xml version = "1.0" encoding = "UTF-8"?><p><s tempmm = "'+password+'"/></p>'
                # resp = self.s.post(LOGIN_URL, data=dict(
                #     method='writeMM2Temp',
                #     _xmlString=xmlstr,
                #     _random=random.random()
                # ),headers={'Content-Type':'application/x-www-form-urlencoded;charset=UTF-8','X-Requested-With':'XMLHttpRequest'})
                # soup = BeautifulSoup(resp.content, 'html.parser')
                #
                # xmlstrs = '<?xml version="1.0" encoding="UTF-8"?><p> <s userid ="'+id_num+'"/> <s usermm="'+pw+'"/><s authcode="'+vc+'"/><s yxzjlx="A"/><s appversion="81002198533703667231184339811848228729"/><s dlfs=""/></p>'
                # resp = self.s.post(LOGIN_URL, data=dict(
                #     method='doLogon',
                #     _xmlString=xmlstrs,
                #     _random=random.random()
                # ), headers={'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
                #            'X-Requested-With': 'XMLHttpRequest'})
                # soup = BeautifulSoup(resp.content, 'html.parser')
                errormsg = self.s.soup.text
                if errormsg:
                    if len(errormsg) > 20:
                        dicts = eval(errormsg.replace('true', '"true"'))
                        self.g.usersession_uuid = dicts['__usersession_uuid']
                    else:
                        raise InvalidParamsError(errormsg)

                    self.result_key = id_num
                    # 保存到meta
                    self.result_meta['身份证号'] = id_num
                    self.result_meta['密码'] = password

                    self.result_identity['task_name'] = '烟台'
                    self.result_identity['target_id'] = id_num

                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)
        raise AskForParamsError([
            dict(key='身份证号',
                 name='身份证号',
                 cls='input',
                 value=params.get('身份证号', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(MAIN_URL)

            username_input = driver.find_element_by_xpath(
                '//*[@id="yhmInput"]')
            password_input = driver.find_element_by_xpath('//*[@id="mmInput"]')
            vc_input = driver.find_element_by_xpath(
                '//*[@id="authcode_result"]')
            # submit_btn = driver.find_element_by_xpath('//*[@name="login_btn"]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)

            #验证码
            vc_input.clear()
            vc_input.send_keys(vc)
            Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()
            # 提交
            driver.execute_script(
                'onLogin("1.0.68","105","mainFrame.jsp?","1","")')
            #submit_btn.click()
            time.sleep(8)
            # Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()
            if driver.current_url != MAIN_URL:
                print('登录成功')
                # 保存登录后的页面内容供抓取单元解析使用
                login_page_html = driver.find_element_by_tag_name(
                    'html').get_attribute('innerHTML')
                self.s.soup = BeautifulSoup(login_page_html, 'html.parser')

                # realname=soup.select('#xm')[0].text
            else:
                # FIXME: 尝试处理alert
                err_msg = '登录失败,请检查输入'
                alert = driver.switch_to.alert
                try:
                    err_msg = alert.text
                    # alert.accept()
                finally:
                    raise InvalidParamsError(err_msg)

    def _unit_fetch_name(self):
        try:
            data = self.result_data
            resp = self.s.post(
                INFO_URL,
                data=dict(method='returnMain',
                          __usersession_uuid=self.g.usersession_uuid,
                          _random=random.random()),
                headers={
                    'Content-Type':
                    'application/x-www-form-urlencoded;charset=UTF-8',
                    'X-Requested-With': 'XMLHttpRequest'
                })
            soup = BeautifulSoup(resp.content, 'html.parser')
            arrcbsj = [
                soup.findAll('input')[6].attrs['value'],
                soup.findAll('input')[10].attrs['value'],
                soup.findAll('input')[14].attrs['value'],
                soup.findAll('input')[18].attrs['value'],
                soup.findAll('input')[22].attrs['value'],
                soup.findAll('input')[26].attrs['value']
            ]
            baseinfoarr = {
                '养老':
                '正常参保'
                if soup.findAll('input')[7].attrs['value'] == '参保缴费' else '停缴',
                '医疗':
                '正常参保' if soup.findAll('input')[11].attrs['value'] == '参保缴费'
                else '停缴',
                '失业':
                '正常参保' if soup.findAll('input')[15].attrs['value'] == '参保缴费'
                else '停缴',
                '工伤':
                '正常参保' if soup.findAll('input')[19].attrs['value'] == '参保缴费'
                else '停缴',
                '生育':
                '正常参保' if soup.findAll('input')[23].attrs['value'] == '参保缴费'
                else '停缴',
                soup.findAll('input')[25].attrs['value']:
                '正常参保'
                if soup.findAll('input')[27].attrs['value'] == '参保缴费' else '停缴'
            }
            data['baseInfo'] = {
                '姓名': soup.findAll('input')[0].attrs['value'],
                '身份证号': soup.findAll('input')[1].attrs['value'],
                '手机号码': soup.findAll('input')[2].attrs['value'],
                '家庭住址': soup.findAll('input')[3].attrs['value'],
                '通讯地址': soup.findAll('input')[4].attrs['value'],
                '五险状态': baseinfoarr,
                '开始缴费时间': min(arrcbsj),
                "更新时间": datetime.datetime.now().strftime('%Y-%m-%d'),
                '城市名称': '烟台',
                '城市编号': '370600'
            }
            self.result_identity['target_name'] = soup.findAll(
                'input')[0].attrs['value']
            idtstatus = '停缴'
            if '正常参保' in baseinfoarr.values():
                idtstatus = '正常参保'
            self.result_identity['status'] = idtstatus

            #养老
            resp = self.s.post(
                YL_URL,
                data=dict(method='queryAgedPayHis',
                          __usersession_uuid=self.g.usersession_uuid,
                          _random=random.random()),
                headers={
                    'Content-Type':
                    'application/x-www-form-urlencoded;charset=UTF-8',
                    'X-Requested-With': 'XMLHttpRequest'
                })
            soup = BeautifulSoup(resp.content, 'html.parser')
            spantext = soup.findAll('span')[1].text.split(',')
            data['baseInfo']['缴费时长'] = int(spantext[0].replace('共缴费',
                                                               '').replace(
                                                                   '个月', ''))
            data['baseInfo']['最近缴费时间'] = spantext[3].replace('缴费年月为',
                                                             '').replace(
                                                                 '。', '')
            #data['baseInfo']['开始缴费时间'] = spantext[2].replace('最早缴费年月为', '')
            selecttext = soup.findAll('option')
            ylsum = 0.00
            for i in range(1, len(selecttext)):
                print(selecttext[i].text)

                self.result_data['old_age'] = {}
                self.result_data['old_age']['data'] = {}
                years = ''
                months = ''
                trinfo = soup.findAll('table')[1]
                for tr in trinfo.findAll('tr'):
                    arr = []
                    cell = [i.text for i in tr.find_all('td')]
                    if cell[0] == '':
                        cell = [i.attrs['value'] for i in tr.find_all('input')]
                        yearmonth = cell[1]
                        ylsum = ylsum + float(cell[4])
                        if years == '' or years != yearmonth[:4]:
                            years = yearmonth[:4]
                            self.result_data['old_age']['data'][years] = {}
                            if len(months) > 0:
                                if months == yearmonth[-2:]:
                                    self.result_data['old_age']['data'][years][
                                        months] = {}
                        if months == '' or months != yearmonth[-2:]:
                            months = yearmonth[-2:]
                            self.result_data['old_age']['data'][years][
                                months] = {}
                        dicts = {
                            '险种': cell[0],
                            '缴费时间': cell[1],
                            '缴费类型': '',
                            '缴费基数': cell[2].replace(',', ''),
                            '公司缴费': cell[3],
                            '个人缴费': cell[4],
                            '单位编号': cell[5],
                            '缴费单位': cell[6]
                        }
                        arr.append(dicts)
                        self.result_data['old_age']['data'][years][
                            months] = arr
                data['baseInfo']['个人养老累计缴费'] = ylsum

            #医疗
            resp = self.s.post(
                YIL_URL,
                data=dict(method='queryMediPayHis',
                          __usersession_uuid=self.g.usersession_uuid,
                          _random=random.random()),
                headers={
                    'Content-Type':
                    'application/x-www-form-urlencoded;charset=UTF-8',
                    'X-Requested-With': 'XMLHttpRequest'
                })
            soup = BeautifulSoup(resp.content, 'html.parser')
            selecttext = soup.findAll('option')
            yilsum = 0.00
            for i in range(1, len(selecttext)):
                print(selecttext[i].text)

                self.result_data['medical_care'] = {}
                self.result_data['medical_care']['data'] = {}
                years = ''
                months = ''
                trinfo = soup.findAll('table')[1]
                for tr in trinfo.findAll('tr'):
                    arr = []
                    cell = [i.text for i in tr.find_all('td')]
                    if cell[0] == '':
                        cell = [i.attrs['value'] for i in tr.find_all('input')]
                        yearmonth = cell[1]
                        yilsum = yilsum + float(cell[4])
                        if years == '' or years != yearmonth[:4]:
                            years = yearmonth[:4]
                            self.result_data['medical_care']['data'][
                                years] = {}
                            if len(months) > 0:
                                if months == yearmonth[-2:]:
                                    self.result_data['medical_care']['data'][
                                        years][months] = {}
                        if months == '' or months != yearmonth[-2:]:
                            months = yearmonth[-2:]
                            self.result_data['medical_care']['data'][years][
                                months] = {}
                        dicts = {
                            '险种': cell[0],
                            '缴费时间': cell[1],
                            '缴费类型': '',
                            '缴费基数': cell[2].replace(',', ''),
                            '公司缴费': cell[3],
                            '个人缴费': cell[4],
                            '单位编号': cell[5],
                            '缴费单位': cell[6]
                        }
                        arr.append(dicts)
                        self.result_data['medical_care']['data'][years][
                            months] = arr
                data['baseInfo']['个人医疗累计缴费'] = yilsum

            # 工商
            resp = self.s.post(
                GS_URL,
                data=dict(method='queryHarmPayHis',
                          __usersession_uuid=self.g.usersession_uuid,
                          _random=random.random()),
                headers={
                    'Content-Type':
                    'application/x-www-form-urlencoded;charset=UTF-8',
                    'X-Requested-With': 'XMLHttpRequest'
                })
            soup = BeautifulSoup(resp.content, 'html.parser')
            selecttext = soup.findAll('option')
            for i in range(1, len(selecttext)):
                print(selecttext[i].text)

                self.result_data['injuries'] = {}
                self.result_data['injuries']['data'] = {}
                years = ''
                months = ''
                trinfo = soup.findAll('table')[1]
                for tr in trinfo.findAll('tr'):
                    arr = []
                    cell = [i.text for i in tr.find_all('td')]
                    if cell[0] == '':
                        cell = [i.attrs['value'] for i in tr.find_all('input')]
                        yearmonth = cell[1]
                        if years == '' or years != yearmonth[:4]:
                            years = yearmonth[:4]
                            self.result_data['injuries']['data'][years] = {}
                            if len(months) > 0:
                                if months == yearmonth[-2:]:
                                    self.result_data['injuries']['data'][
                                        years][months] = {}
                        if months == '' or months != yearmonth[-2:]:
                            months = yearmonth[-2:]
                            self.result_data['injuries']['data'][years][
                                months] = {}
                        dicts = {
                            '险种': cell[0],
                            '缴费时间': cell[1],
                            '缴费类型': '',
                            '缴费基数': cell[2].replace(',', ''),
                            '公司缴费': cell[3],
                            '个人缴费': cell[4],
                            '单位编号': cell[5],
                            '缴费单位': cell[6]
                        }
                        arr.append(dicts)
                        self.result_data['injuries']['data'][years][
                            months] = arr

            # 生育
            resp = self.s.post(
                SHY_URL,
                data=dict(method='queryBirthPayHis',
                          __usersession_uuid=self.g.usersession_uuid,
                          _random=random.random()),
                headers={
                    'Content-Type':
                    'application/x-www-form-urlencoded;charset=UTF-8',
                    'X-Requested-With': 'XMLHttpRequest'
                })
            soup = BeautifulSoup(resp.content, 'html.parser')
            selecttext = soup.findAll('option')
            for i in range(1, len(selecttext)):
                print(selecttext[i].text)

                self.result_data['maternity'] = {}
                self.result_data['maternity']['data'] = {}
                years = ''
                months = ''
                trinfo = soup.findAll('table')[1]
                for tr in trinfo.findAll('tr'):
                    arr = []
                    cell = [i.text for i in tr.find_all('td')]
                    if cell[0] == '':
                        cell = [i.attrs['value'] for i in tr.find_all('input')]
                        yearmonth = cell[1]
                        if years == '' or years != yearmonth[:4]:
                            years = yearmonth[:4]
                            self.result_data['maternity']['data'][years] = {}
                            if len(months) > 0:
                                if months == yearmonth[-2:]:
                                    self.result_data['maternity']['data'][
                                        years][months] = {}
                        if months == '' or months != yearmonth[-2:]:
                            months = yearmonth[-2:]
                            self.result_data['maternity']['data'][years][
                                months] = {}
                        dicts = {
                            '险种': cell[0],
                            '缴费时间': cell[1],
                            '缴费类型': '',
                            '缴费基数': cell[2].replace(',', ''),
                            '公司缴费': cell[3],
                            '个人缴费': cell[4],
                            '单位编号': cell[5],
                            '缴费单位': cell[6]
                        }
                        arr.append(dicts)
                        self.result_data['maternity']['data'][years][
                            months] = arr

            # 失业
            resp = self.s.post(
                SY_URL,
                data=dict(method='queryLostPayHis',
                          __usersession_uuid=self.g.usersession_uuid,
                          _random=random.random()),
                headers={
                    'Content-Type':
                    'application/x-www-form-urlencoded;charset=UTF-8',
                    'X-Requested-With': 'XMLHttpRequest'
                })
            soup = BeautifulSoup(resp.content, 'html.parser')
            selecttext = soup.findAll('option')
            for i in range(1, len(selecttext)):
                print(selecttext[i].text)

                self.result_data['unemployment'] = {}
                self.result_data['unemployment']['data'] = {}
                years = ''
                months = ''
                trinfo = soup.findAll('table')[1]
                for tr in trinfo.findAll('tr'):
                    arr = []
                    cell = [i.text for i in tr.find_all('td')]
                    if cell[0] == '':
                        cell = [i.attrs['value'] for i in tr.find_all('input')]
                        yearmonth = cell[1]
                        if years == '' or years != yearmonth[:4]:
                            years = yearmonth[:4]
                            self.result_data['unemployment']['data'][
                                years] = {}
                            if len(months) > 0:
                                if months == yearmonth[-2:]:
                                    self.result_data['unemployment']['data'][
                                        years][months] = {}
                        if months == '' or months != yearmonth[-2:]:
                            months = yearmonth[-2:]
                            self.result_data['unemployment']['data'][years][
                                months] = {}
                        dicts = {
                            '险种': cell[0],
                            '缴费时间': cell[1],
                            '缴费类型': '',
                            '缴费基数': cell[2].replace(',', ''),
                            '公司缴费': cell[3],
                            '个人缴费': cell[4],
                            '单位编号': cell[5],
                            '缴费单位': cell[6]
                        }
                        arr.append(dicts)
                        self.result_data['unemployment']['data'][years][
                            months] = arr
            # 大病
            resp = self.s.post(
                YIL_URL,
                data=dict(method='queryEmpJfxxZzCxDe',
                          __usersession_uuid=self.g.usersession_uuid,
                          _random=random.random()),
                headers={
                    'Content-Type':
                    'application/x-www-form-urlencoded;charset=UTF-8',
                    'X-Requested-With': 'XMLHttpRequest'
                })
            soup = BeautifulSoup(resp.content, 'html.parser')
            selecttext = soup.findAll('option')
            for i in range(1, len(selecttext)):
                print(selecttext[i].text)

                self.result_data['serious_illness'] = {}
                self.result_data['serious_illness']['data'] = {}
                years = ''
                months = ''
                trinfo = soup.findAll('table')[1]
                for tr in trinfo.findAll('tr'):
                    arr = []
                    cell = [i.text for i in tr.find_all('td')]
                    if cell[0] == '':
                        cell = [i.attrs['value'] for i in tr.find_all('input')]
                        yearmonth = cell[1]
                        if years == '' or years != yearmonth[:4]:
                            years = yearmonth[:4]
                            self.result_data['serious_illness']['data'][
                                years] = {}
                            if len(months) > 0:
                                if months == yearmonth[-2:]:
                                    self.result_data['unemployment']['data'][
                                        years][months] = {}
                        if months == '' or months != yearmonth[-2:]:
                            months = yearmonth[-2:]
                            self.result_data['serious_illness']['data'][years][
                                months] = {}
                        dicts = {
                            '险种': cell[0],
                            '缴费时间': cell[1],
                            '缴费类型': cell[6],
                            '缴费基数': cell[2].replace(',', ''),
                            '公司缴费': '',
                            '个人缴费': cell[3],
                            '单位编号': cell[4],
                            '缴费单位': cell[5]
                        }
                        arr.append(dicts)
                        self.result_data['serious_illness']['data'][years][
                            months] = arr
            return
        except PermissionError as e:
            raise PreconditionNotSatisfiedError(e)

    def _new_vc(self):
        #randoms=random.random()
        #vc_url = VC_URL +str(randoms) #str(int(time.time() * 1000))
        resps = json.loads(self.s.get(VC_URL).text)
        firstNum = resps['numLeftBase64']
        oprate = resps['operatorBase64']
        lastNum = resps['numRightBase64']
        equla = resps['equalsBase64']

        arr = [firstNum, oprate, lastNum, equla]
        toImage = Image.new('RGB', (110, 50), (255, 255, 255))
        for i in range(4):
            fromImge = Image.open(io.BytesIO(base64.b64decode(arr[i])))
            if (fromImge.mode == "P"):
                fromImge.convert("RGB")
            loc = (i * 22 + 15, 10)
            toImage.paste(fromImge, loc)
        imgsave = io.BytesIO()
        toImage.save(imgsave, "PNG")
        imgsave.seek(0)
        resp = imgsave.read()
        return dict(cls='data:image', content=resp)
Пример #14
0
class Task(AbsFetchTask):
    task_info = dict(city_name="重庆",
                     help="""<li>初始密码为公积金账号后四位+00;可登录重庆住房公积金管理中心官网后进行修改。</li>
        <li>未验证注册用户首次登录时需进行身份验证,具体验证方式如下:用户通过输入公积金联名卡后六位(若用户未办理公积金联名卡的须输入个人公积金账号)验证登录。</li>""",
                     developers=[{
                         'name': '卜圆圆',
                         'email': '*****@*****.**'
                     }])

    def _get_common_headers(self):
        return {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'
        }

    def _query(self, params: dict):
        """任务状态查询"""
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()

    def _prepare(self, data=None):
        super()._prepare(data)
        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/web\/ImageCheck.jpg/g')
        proxy = webdriver.Proxy()
        proxy.proxy_type = ProxyType.DIRECT
        proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
        driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
        # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
        driver.set_page_load_timeout(13)
        # 设置10秒脚本超时时间
        driver.set_script_timeout(13)
        driver.get('https://www.cqgjj.cn/xxx')
        return driver

    def _setup_task_units(self):
        """设置任务执行单元"""
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '账号' in params, '缺少账号'
        assert '密码' in params, '缺少密码'
        assert 'vc' in params, '缺少验证码'
        # other check
        账号 = params['账号']
        密码 = params['密码']
        if len(密码) < 4:
            raise InvalidParamsError('账号或密码错误')
        if 账号.isdigit():
            if len(账号) < 5:
                raise InvalidParamsError('账号错误')
            return
        raise InvalidParamsError('账号或密码错误')

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '账号' not in params:
                params['账号'] = meta.get('账号')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '账号' and '账号' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            res.append(pr)
        return res

    def _unit_login(self, params: dict):
        err_msg = None
        params
        if params:
            try:
                self._check_login_params(params)
                id_num = params['账号']
                password = params['密码']
                vc = params['vc']
                resps = self.s.get(LOGIN_PAGE_URL, timeout=10)
                soup = BeautifulSoup(resps.content, 'html.parser')
                VIEWSTATE = soup.select('#__VIEWSTATE')[0].attrs['value']
                VIEWSTATEGENERATOR = soup.select(
                    '#__VIEWSTATEGENERATOR')[0].attrs['value']
                EVENTVALIDATION = soup.select(
                    '#__EVENTVALIDATION')[0].attrs['value']
                resp = self.s.post(LOGIN_PAGE_URL,
                                   data=dict(
                                       __VIEWSTATE=VIEWSTATE,
                                       __VIEWSTATEGENERATOR=VIEWSTATEGENERATOR,
                                       __EVENTVALIDATION=EVENTVALIDATION,
                                       HiddenField1=id_num,
                                       txt_loginname=id_num,
                                       txt_pwd=password,
                                       txt_code=vc,
                                       loginBtn=''),
                                   timeout=25)
                soup = BeautifulSoup(resp.content, 'html.parser')
                #self._do_login(id_num, password, vc)
                if len(soup.select('.error')) > 1:
                    err_msg = soup.select('.error')[0].text.replace(
                        ' ', '').replace('\n', '')
                    if err_msg:
                        raise InvalidParamsError(err_msg)
                    else:
                        print("登录成功!")
                else:
                    print("登录成功!")

                self.result_key = params.get('账号')
                # 保存到meta
                self.result_meta['账号'] = params.get('账号')
                self.result_meta['密码'] = params.get('密码')
                self.result_identity['task_name'] = '重庆'

                return
                #raise TaskNotImplementedError('查询服务维护中')
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)
        vc = self._new_vc()
        raise AskForParamsError([
            dict(key='账号',
                 name='账号',
                 cls='input',
                 placeholder='账号/手机',
                 value=params.get('账号', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(LOGIN_PAGE_URL)

            username_input = driver.find_element_by_xpath(
                '//*[@id="txt_loginname"]')
            password_input = driver.find_element_by_xpath(
                '//*[@id="txt_pwd"]')  #pwdRow/td[2]/input[1]
            vc_input = driver.find_element_by_xpath('//*[@id="txt_code"]')
            submit_btn = driver.find_element_by_xpath('//*[@id="loginBtn"]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            driver.execute_script('$("#txt_pwd").removeAttr("readonly")')
            password_input.send_keys(password)
            #Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()

            #验证码
            vc_input.clear()
            vc_input.send_keys(vc)

            # 提交
            submit_btn.click()
            time.sleep(2)

            #Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()

            #login_page_html = driver.find_element_by_tag_name('html').get_attribute('innerHTML')
            if driver.current_url != LOGIN_PAGE_URL:
                print('登录成功')
                # 保存登录后的页面内容供抓取单元解析使用
                #login_page_html = driver.find_element_by_tag_name('html').get_attribute('innerHTML')
                #self.s.soup = BeautifulSoup(login_page_html, 'html.parser')

                # realname=soup.select('#xm')[0].text
            else:
                # FIXME: 尝试处理alert

                err_msg = driver.find_elements_by_class_name('error')[0].text
                #err_msg = '登录失败,请检查输入'
                #alert = driver.switch_to.alert
                try:
                    err_msg = err_msg  #alert.text
                    # alert.accept()
                finally:
                    raise InvalidParamsError(err_msg)

    def _unit_fetch(self):
        try:
            # TODO: 执行任务,如果没有登录,则raise PermissionError
            # 基本信息
            resp = self.s.get(INFO_URL, timeout=25)
            soup = BeautifulSoup(resp.content, 'html.parser')
            table = soup.find('table')
            data = self.result_data
            data['baseInfo'] = {
                '城市名称': '重庆',
                '城市编号': '500100',
                '证件类型': '身份证',
                '个人登记号': '',
                '更新时间': time.strftime("%Y-%m-%d", time.localtime())
            }
            for tr in table.findAll('tr'):
                cell = [
                    i.text.replace('\n', '').replace('\r', '').replace(
                        '     ', '').replace(':  ', '')
                    for i in tr.find_all('td')
                ]
                if len(cell) > 1:
                    data['baseInfo'].setdefault(
                        cell[0].replace(' ', '').replace(
                            '身份证号码', '证件号').replace('开户时间', '开户日期').replace(
                                '个人月缴交额(元)', '个人月缴存额').replace(
                                    '单位月缴交额(元)', '单位月缴存额').replace(
                                        '个人公积金帐号', '公积金帐号').replace(
                                            '个人序号', '个人账号').replace(
                                                '当前余额(元)', '当前余额').replace(
                                                    '当前状态',
                                                    '帐户状态').replace(':', ''),
                        cell[1].replace('-', '').replace(' ', ''))

            self.result_identity['target_name'] = data['baseInfo']['姓名']
            self.result_identity['target_id'] = data['baseInfo']['证件号']
            if '正常' in data['baseInfo']['帐户状态']:
                self.result_identity['status'] = '缴存'
            else:
                self.result_identity['status'] = '封存'

            #公积金明细
            resp = self.s.get(MINGXI_URL, timeout=25)
            soup = BeautifulSoup(resp.content, 'html.parser')
            table = soup.find('table')
            data['detail'] = {}
            data['detail']['data'] = {}
            years = ''
            months = ''
            maxtime = ''
            y = 1
            hjtype = 0
            hjje = ''
            hjrq = ''
            hjcs = 0
            for tb in table.findAll('tbody'):
                dic = {}
                arr = []
                cell = [
                    i.text.replace(' ', '').replace('\r\n', '')
                    for i in tb.find_all('td')
                ]
                typedate = cell[1].split('[')
                hj = ''
                lx = cell[1]
                if '结息' in lx:
                    lx = '结息'
                if len(typedate) > 1:
                    hj = typedate[1].replace(']', '')
                    lx = typedate[0]
                if (y == 1):
                    maxtime = cell[0]
                    if '汇缴' in lx:
                        hjrq = hj
                        hjje = str(float(cell[2]) + float(cell[3]))
                        hjtype = 1
                y = y + 1

                if hj:
                    hjcs = hjcs + 1
                    if hjtype == 0:
                        hjrq = hj
                        hjje = str(float(cell[2]) + float(cell[3]))
                        hjtype = 1

                dic = {
                    '时间': cell[0],
                    '单位名称': '',
                    '支出': 0,
                    '收入': str(float(cell[2]) + float(cell[3])),
                    '汇缴年月': hj,
                    '余额': cell[4],
                    '类型': lx
                }
                times = cell[0][:7].replace('-', '')
                if years != times[:4]:
                    years = times[:4]
                    data['detail']['data'][years] = {}
                    if months != times[-2:]:
                        months = times[-2:]
                        data['detail']['data'][years][months] = {}
                else:
                    if months != times[-2:]:
                        months = times[-2:]
                        data['detail']['data'][years][months] = {}
                    else:
                        arr = data['detail']['data'][years][months]
                arr.append(dic)
                data['detail']['data'][years][months] = arr
            data['baseInfo']['最近汇缴日期'] = hjrq
            data['baseInfo']['最近汇缴金额'] = hjje
            data['baseInfo']['累计汇缴次数'] = hjcs

            #companyList
            data['companyList'] = []
            enterdic = {
                "单位名称": data['baseInfo']['单位名称'],
                # "单位登记号": "",
                # "所属管理部编号": "",
                # "所属管理部名称": "",
                "当前余额": data['baseInfo']['当前余额'],
                "帐户状态": data['baseInfo']['帐户状态'],
                #"当年缴存金额": 0,
                # "当年提取金额": 0,
                #"上年结转余额": 0,
                "最后业务日期": maxtime
                # "转出金额": 0
            }
            data['companyList'].append(enterdic)
            return
        except PermissionError as e:
            raise PreconditionNotSatisfiedError(e)

    def _new_vc(self):
        #vc_url = VC_URL  # + str(int(time.time() * 1000))
        resp = self.s.get(VC_URL, timeout=20)
        return dict(content=resp.content,
                    content_type=resp.headers['Content-Type'])
Пример #15
0
class Task(AbsFetchTask):
    task_info = dict(
        city_name="深圳",
        help="""<li>如您首次在网上查询您的公积金账户,初始密码为身份证后六位,身份证号码有字母的用数字“0”代替。</li>
                <li>如您在公积金官网查询过您的公积金账户,请输入账户信息和密码登录即可。</li>""",
        developers=[{
            'name': '卜圆圆',
            'email': '*****@*****.**'
        }])

    def _get_common_headers(self):
        return {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
        }

    def _query(self, params: dict):
        """任务状态查询"""
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()

    def _prepare(self, data=None):
        super()._prepare(data)
        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/web\/ImageCheck.jpg/g')
        driver.get('https://nbp.szzfgjj.com/xxx')
        return driver

    def _setup_task_units(self):
        """设置任务执行单元"""
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch, self._unit_login)

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '公积金账号' in params, '缺少公积金账号'
        assert '密码' in params, '缺少密码'
        assert 'vc' in params, '缺少验证码'
        # other check
        公积金账号 = params['公积金账号']
        密码 = params['密码']

        if len(公积金账号) == 0:
            raise InvalidParamsError('公积金账号为空,请输入公积金账号')
        elif len(公积金账号) != 11:
            raise InvalidParamsError('公积金账号不正确,请重新输入')

        if len(密码) == 0:
            raise InvalidParamsError('密码为空,请输入密码!')
        elif len(密码) < 6:
            raise InvalidParamsError('密码不正确,请重新输入!')

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '公积金账号' not in params:
                params['公积金账号'] = meta.get('公积金账号')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '公积金账号' and '公积金账号' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            res.append(pr)
        return res

    def _unit_login(self, params: dict):
        err_msg = None
        if params:
            try:
                self._check_login_params(params)
                id_num = params['公积金账号']
                password = params['密码']
                vc = params['vc']
                #self._do_login(id_num, password, vc)
                m = hashlib.md5()
                m.update(password.encode(encoding='utf-8'))
                hashpsw = m.hexdigest()
                data = {
                    'task': 'pri',
                    'transcode': 'card',
                    'ssoLogin': '',
                    'issueName': '',
                    'UserCert': '',
                    'bjcaRanStr': '',
                    'ranStr': '',
                    'CardNo': id_num,
                    'QryPwd': '19ee8550b7b1af89',
                    'identifyCode': vc,
                    'sSignTxt': '',
                    'SUBMIT.x': '91',
                    'SUBMIT.y': '15'
                }
                resp = self.s.post(LOGIN_URL,
                                   data=data,
                                   headers={
                                       'Content-Type':
                                       'application/x-www-form-urlencoded',
                                       'Cache-Control': 'no-cache'
                                   })
                soup = self.s.soup
                errormsg = soup.select('.message')[0].text
                if errormsg and errormsg != id_num:
                    raise Exception(errormsg)
                else:
                    print('chengong')

                self.result_key = params.get('公积金账号')
                # 保存到meta
                self.result_meta['公积金账号'] = params.get('公积金账号')
                self.result_meta['密码'] = params.get('密码')
                self.result_identity['task_name'] = '深圳'

                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)
        vc = self._new_vc()
        raise AskForParamsError([
            dict(key='公积金账号',
                 name='公积金账号',
                 cls='input',
                 placeholder='公积金账号',
                 value=params.get('公积金账号', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc', name='验证码', cls='data:image', query={'t': 'vc'}),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(LOGIN_PAGE_URL)

            username_input = driver.find_element_by_xpath(
                '//*[@id="pri"]/p[1]/label[2]/input')
            password_input = driver.find_element_by_xpath(
                '//*[@id="pri"]/p[2]/label[2]')
            vc_input = driver.find_element_by_xpath(
                '//*[@name="identifyCode"]')
            submit_btn = driver.find_element_by_xpath(
                '//*[@id="pri"]/p[6]/input[1]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)

            #验证码
            vc_input.clear()
            vc_input.send_keys(vc)

            Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()

            # 提交
            submit_btn.click()
            time.sleep(2)

            if driver.current_url != LOGIN_PAGE_URL:
                print('登录成功')
                # 保存登录后的页面内容供抓取单元解析使用
                login_page_html = driver.find_element_by_tag_name(
                    'html').get_attribute('innerHTML')
                self.s.soup = BeautifulSoup(login_page_html, 'html.parser')

                # realname=soup.select('#xm')[0].text
            else:
                # FIXME: 尝试处理alert
                err_msg = '登录失败,请检查输入'
                alert = driver.switch_to.alert
                try:
                    err_msg = alert.text
                    # alert.accept()
                finally:
                    raise InvalidParamsError(err_msg)

    def _unit_fetch(self):
        try:
            # TODO: 执行任务,如果没有登录,则raise PermissionError
            return
        except PermissionError as e:
            raise PreconditionNotSatisfiedError(e)

    def _new_vc(self):
        vc_url = VC_URL  # + str(int(time.time() * 1000))
        resp = self.s.get(vc_url)
        return dict(content=resp.content,
                    content_type=resp.headers['Content-Type'])
Пример #16
0
class Task(AbsFetchTask):
    task_info = dict(
        city_name="宁波",
        help="""<li>如您未在社保网站查询过您的社保信息,请到宁波社保网上服务平台完成“注册”然后再登录。</li>
            <li>如有问题请拨打12333。</li>
            """,
        developers=[{
            'name': '卜圆圆',
            'email': '*****@*****.**'
        }])

    def _get_common_headers(self):
        return {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36'
        }

    def _setup_task_units(self):
        self._add_unit(self._unit_login)
        self._add_unit(self._unit_fetch_name, self._unit_login)

    def _query(self, params: dict):
        t = params.get('t')
        if t == 'vc':
            return self._new_vc()

    def _prepare(self, data=None):
        super()._prepare(data)
        self.dsc = DriverRequestsCoordinator(s=self.s,
                                             create_driver=self._create_driver)

    def _create_driver(self):
        driver = new_driver(user_agent=USER_AGENT,
                            js_re_ignore='/web\/ImageCheck.jpg/g')
        proxy = webdriver.Proxy()
        proxy.proxy_type = ProxyType.DIRECT
        proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
        driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
        # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
        driver.set_page_load_timeout(13)
        # 设置10秒脚本超时时间
        driver.set_script_timeout(13)
        driver.get(MAIN_URL)
        return driver

    def _check_login_params(self, params):
        assert params is not None, '缺少参数'
        assert '身份证号' in params, '缺少身份证号'
        assert '密码' in params, '缺少密码'
        # other check
        身份证号 = params['身份证号']
        密码 = params['密码']

        if len(身份证号) == 0:
            raise InvalidParamsError('身份证号为空,请输入身份证号')
        elif len(身份证号) < 15:
            raise InvalidParamsError('身份证号不正确,请重新输入')

        if len(密码) == 0:
            raise InvalidParamsError('密码为空,请输入密码!')
        elif len(密码) < 6:
            raise InvalidParamsError('密码不正确,请重新输入!')

    def _params_handler(self, params: dict):
        if not (self.is_start and not params):
            meta = self.prepared_meta
            if '身份证号' not in params:
                params['身份证号'] = meta.get('身份证号')
            if '密码' not in params:
                params['密码'] = meta.get('密码')
        return params

    def _param_requirements_handler(self, param_requirements, details):
        meta = self.prepared_meta
        res = []
        for pr in param_requirements:
            # TODO: 进一步检查details
            if pr['key'] == '身份证号' and '身份证号' in meta:
                continue
            elif pr['key'] == '密码' and '密码' in meta:
                continue
            elif pr['key'] == 'other':
                continue
            res.append(pr)
        return res

    def _unit_login(self, params: dict):
        err_msg = None
        params
        if params:
            try:
                self._check_login_params(params)
                id_num = params['身份证号']
                pwd = params['密码']
                yzm = params['vc']
                #self._do_login(id_num, pwd, yzm)
                resp = self.s.post(CVC_URL,
                                   data=dict(client='NBHRSS_WEB', yzm=yzm))
                soup = BeautifulSoup(resp.content, 'html.parser')
                infors = json.loads(soup.text)
                if infors['result'] == '0':
                    resp = self.s.post(LOGIN_URL,
                                       data=dict(id=id_num,
                                                 password=pwd,
                                                 client='NBHRSS_WEB',
                                                 phone=''))
                    soup = BeautifulSoup(resp.content, 'html.parser')
                    infor = json.loads(soup.text)
                    if infor['ret'] == '1':
                        print("登录成功!")
                        inforr = json.loads(infor['result'])
                        self.g.access_token = inforr['access_token']
                        self.result_data["baseInfo"] = {
                            '城市名称': '宁波',
                            '城市编号': '330200',
                            '更新时间': time.strftime("%Y-%m-%d",
                                                  time.localtime()),
                            '姓名': inforr['xm'],
                            '身份证号': inforr['sfz'],
                            '社会保障卡号码': inforr['sbkh']
                        }
                        self.result_identity['target_name'] = inforr['xm']
                    elif infor['msg'] == 'E1001':
                        raise InvalidParamsError('请去官网进行账号升级!')
                    else:
                        raise InvalidParamsError(infor['msg'])
                else:
                    raise InvalidParamsError('验证码错误!')

                self.result_key = id_num
                self.result_meta['身份证号'] = id_num
                self.result_meta['密码'] = pwd
                self.result_identity['task_name'] = '宁波'
                self.result_identity['target_id'] = id_num

                return
            except (AssertionError, InvalidParamsError) as e:
                err_msg = str(e)
        raise AskForParamsError([
            dict(key='身份证号',
                 name='身份证号',
                 cls='input',
                 value=params.get('身份证号', '')),
            dict(key='密码',
                 name='密码',
                 cls='input:password',
                 value=params.get('密码', '')),
            dict(key='vc',
                 name='验证码',
                 cls='data:image',
                 query={'t': 'vc'},
                 value=params.get('vc', '')),
        ], err_msg)

    def _do_login(self, username, password, vc):
        """使用web driver模拟登录过程"""
        with self.dsc.get_driver_ctx() as driver:
            # 打开登录页
            driver.get(MAIN_URL)

            username_input = driver.find_element_by_xpath('//*[@id="loginid"]')
            password_input = driver.find_element_by_xpath('//*[@id="pwd"]')
            vc_input = driver.find_element_by_xpath('//*[@id="yzm"]')
            submit_btn = driver.find_element_by_xpath('//*[@id="btnLogin"]')

            # 用户名
            username_input.clear()
            username_input.send_keys(username)

            # 密码
            password_input.clear()
            password_input.send_keys(password)
            vc_input.clear()
            vc_input.send_keys(vc)
            # 提交
            submit_btn.click()
            time.sleep(8)
            # Image.open(io.BytesIO(driver.get_screenshot_as_png())).show()
            if driver.current_url == INFO_URL:
                print('登录成功')
                # 保存登录后的页面内容供抓取单元解析使用
                login_page_html = driver.find_element_by_tag_name(
                    'html').get_attribute('innerHTML')
                self.s.soup = BeautifulSoup(login_page_html, 'html.parser')

                # realname=soup.select('#xm')[0].text
            else:
                # FIXME: 尝试处理alert
                err_msg = '登录失败,请检查输入'
                alert = driver.switch_to.alert
                try:
                    err_msg = alert.text
                    # alert.accept()
                finally:
                    raise InvalidParamsError(err_msg)

    def _yanglao(self):
        with self.dsc.get_driver_ctx() as driver:
            driver.get(YL_URL)
            time.sleep(3)
            htmls = driver.find_element_by_tag_name('html').get_attribute(
                'innerHTML')
            soupyl = BeautifulSoup(htmls, 'html.parser')
            mingxitable = soupyl.select('#content')
            tableinfo = mingxitable[0].find_all('tr')
            self.result_data['old_age'] = {}
            self.result_data['old_age']['data'] = {}
            arrstr = []
            years = ''
            months = ''
            maxtime = ''
            y = 1
            for row in tableinfo:
                arr = []
                cell = [i.text for i in row.find_all('td')]
                if len(cell) < 3:
                    arrstr.extend(cell)
                elif len(cell) == 4:
                    yearmonth = cell[0]
                    if years == '' or years != yearmonth[:4]:
                        years = yearmonth[:4]
                        self.result_data['old_age']['data'][years] = {}
                        if len(months) > 0:
                            if months == yearmonth[-2:]:
                                self.result_data['old_age']['data'][years][
                                    months] = {}
                    if months == '' or months != yearmonth[-2:]:
                        months = yearmonth[-2:]
                        self.result_data['old_age']['data'][years][months] = {}
                    dicts = {
                        '缴费时间': cell[0],
                        '缴费类型': '',
                        '缴费基数': cell[1],
                        '公司缴费': '',
                        '个人缴费': cell[2],
                        '缴费单位': '',
                        '到账情况': cell[3]
                    }
                    if y == 1:
                        maxtime = cell[0]
                    y = y + 1
                    arr.append(dicts)
                    self.result_data['old_age']['data'][years][months] = arr
            # print(arrstr)
            if len(arrstr) > 2:
                self.result_data["baseInfo"].setdefault(
                    '单位名称', arrstr[2].replace('单位名称:', ''))
            nowyears = time.strftime("%Y", time.localtime())
            if len(arrstr) > 8:
                jfscolder = int(arrstr[10].replace('至本年末实际缴费月数:', ''))
                ljjfolder = float(arrstr[9].replace('至本年末账户累计储存额:', ''))
            else:
                jfscolder = 0
                ljjfolder = 0.00
            if nowyears in self.result_data['old_age']['data'].keys():
                for k, v in self.result_data['old_age']['data'][
                        nowyears].items():
                    jfscolder = jfscolder + 1
                    ljjfolder = ljjfolder + float(v[0]['个人缴费'])
            self.result_data["baseInfo"].setdefault('缴费时长', jfscolder)
            self.result_data["baseInfo"].setdefault('个人养老累计缴费', ljjfolder)
            self.result_data["baseInfo"].setdefault('最近缴费时间', maxtime)
            if len(self.result_data['old_age']['data']) > 0:
                ksjfsj = min(self.result_data['old_age']['data'])
                self.result_data["baseInfo"].setdefault(
                    '开始缴费时间',
                    ksjfsj + min(self.result_data['old_age']['data'][ksjfsj]))
            else:
                self.result_data["baseInfo"].setdefault('开始缴费时间', '')
            if len(arrstr) > 3:
                cbzt = arrstr[3].replace('参保状态:', '')
            else:
                cbzt = '未知'
            if cbzt == '参保缴费':
                cbzt = '正常参保'
            else:
                cbzt = '停缴'
            self.result_identity['status'] = cbzt
            self.g.Fivestatus = {'养老': cbzt}

    def _yiliao(self):
        with self.dsc.get_driver_ctx() as driver:
            driver.get(YIL_URL)
            time.sleep(3)
            htmls = driver.find_element_by_tag_name('html').get_attribute(
                'innerHTML')
            soupyl = BeautifulSoup(htmls, 'html.parser')
            mingxitable = soupyl.select('#content')
            tableinfo = mingxitable[0].find_all('tr')
            self.result_data['medical_care'] = {}
            self.result_data['medical_care']['data'] = {}
            arrstr = []
            years = ''
            months = ''
            for row in tableinfo:
                arr = []
                cell = [i.text for i in row.find_all('td')]
                if len(cell) < 3:
                    arrstr.extend(cell)
                elif len(cell) == 4:
                    yearmonth = cell[0]
                    if years == '' or years != yearmonth[:4]:
                        years = yearmonth[:4]
                        self.result_data['medical_care']['data'][years] = {}
                        if len(months) > 0:
                            if months == yearmonth[-2:]:
                                self.result_data['medical_care']['data'][
                                    years][months] = {}
                    if months == '' or months != yearmonth[-2:]:
                        months = yearmonth[-2:]
                        self.result_data['medical_care']['data'][years][
                            months] = {}
                    dicts = {
                        '缴费时间': cell[0],
                        '缴费类型': '',
                        '缴费基数': cell[1],
                        '公司缴费': '',
                        '个人缴费': cell[2],
                        '缴费单位': '',
                        '到账情况': cell[3]
                    }
                    arr.append(dicts)
                    self.result_data['medical_care']['data'][years][
                        months] = arr
            # print(arrstr)
            nowyears = time.strftime("%Y", time.localtime())
            if len(arrstr) > 10:
                ljjfolder = float(arrstr[11].replace('个人账户余额:', ''))
            else:
                ljjfolder = 0.00
            if nowyears in self.result_data['old_age']['data'].keys():
                for k, v in self.result_data['old_age']['data'][
                        nowyears].items():
                    ljjfolder = ljjfolder + float(v[0]['个人缴费'])
            self.result_data["baseInfo"].setdefault('个人医疗累计缴费', ljjfolder)
            if len(arrstr) > 4:
                cbzt = arrstr[4].replace('参保状态:', '')
            else:
                cbzt = '未知'
            if cbzt == '参保缴费':
                cbzt = '正常参保'
            else:
                cbzt = '停缴'
            self.g.Fivestatus.setdefault('医疗', cbzt)

    def _gongshang(self):
        with self.dsc.get_driver_ctx() as driver:
            driver.get(GS_URL)
            time.sleep(3)
            htmls = driver.find_element_by_tag_name('html').get_attribute(
                'innerHTML')
            soupyl = BeautifulSoup(htmls, 'html.parser')
            mingxitable = soupyl.select('#content')
            tableinfo = mingxitable[0].find_all('tr')
            arrstr = []
            for row in tableinfo:
                arr = []
                cell = [i.text for i in row.find_all('td')]
                if len(cell) < 3:
                    arrstr.extend(cell)
            if len(arrstr) > 3:
                cbzt = arrstr[3].replace('参保状态:', '')
            else:
                cbzt = '未知'
            if cbzt == '参保缴费':
                cbzt = '正常参保'
            else:
                cbzt = '停缴'
            self.g.Fivestatus.setdefault('工伤', cbzt)

    def _shiye(self):
        with self.dsc.get_driver_ctx() as driver:
            driver.get(SY_URL)
            time.sleep(3)
            htmls = driver.find_element_by_tag_name('html').get_attribute(
                'innerHTML')
            soupyl = BeautifulSoup(htmls, 'html.parser')
            mingxitable = soupyl.select('#content')
            tableinfo = mingxitable[0].find_all('tr')
            arrstr = []
            for row in tableinfo:
                arr = []
                cell = [i.text for i in row.find_all('td')]
                if len(cell) < 3:
                    arrstr.extend(cell)
            if len(arrstr) > 3:
                cbzt = arrstr[3].replace('参保状态:', '')
            else:
                cbzt = '未知'
            if cbzt == '参保缴费':
                cbzt = '正常参保'
            else:
                cbzt = '停缴'
            self.g.Fivestatus.setdefault('失业', cbzt)

    def _shengyu(self):
        with self.dsc.get_driver_ctx() as driver:
            driver.get(SHY_URL)
            time.sleep(3)
            htmls = driver.find_element_by_tag_name('html').get_attribute(
                'innerHTML')
            soupyl = BeautifulSoup(htmls, 'html.parser')
            mingxitable = soupyl.select('#content')
            tableinfo = mingxitable[0].find_all('tr')
            arrstr = []
            for row in tableinfo:
                arr = []
                cell = [i.text for i in row.find_all('td')]
                if len(cell) < 3:
                    arrstr.extend(cell)
            if len(arrstr) > 3:
                cbzt = arrstr[3].replace('参保状态:', '')
            else:
                cbzt = '未知'
            if cbzt == '参保缴费':
                cbzt = '正常参保'
            else:
                cbzt = '停缴'
            self.g.Fivestatus.setdefault('生育', cbzt)

    def _unit_fetch_name(self):
        """用户信息"""
        try:
            respss = self.s.get(
                'https://rzxt.nbhrss.gov.cn/nbsbk-rzxt/web/pages/query/query-grxx.jsp'
            )

            urls = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=10S006&bustype=01&refresh=true&client=NBHRSS_WEB'
            resp = self.s.get(urls)

            urls = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=10S005&bustype=01&refresh=true&client=NBHRSS_WEB'
            resp = self.s.get(urls)
            soup = BeautifulSoup(resp.content, 'html.parser')
            infor = json.loads(soup.text)
            infors = json.loads(infor['result'])
            if 'AAC004' in infors.keys():
                if infors['AAC004'] == '1':
                    xb = '男'
                elif infors['AAC004'] == '2':
                    xb = '女'
            else:
                xb = '未说明性别'
            if 'AAZ502' in infors.keys():
                if infors['AAZ502'] == '1':
                    kt = '正常有卡状态'
                elif infors['AAZ502'] == '2':
                    kt = '正式挂失状态'
                elif infors['AAZ502'] == '4':
                    kt = '临时挂失状态'
            else:
                kt = ''
            self.result_data['baseInfo']['性别'] = xb
            if 'AZA103' in infors.keys():
                self.result_data['baseInfo']['国籍'] = infors['AZA103']
            self.result_data['baseInfo']['社保卡状态'] = kt
            if 'AAE010' in infors.keys():
                self.result_data['baseInfo']['银行账号'] = infors['AAE010']
            if 'AAZ503' in infors.keys():
                self.result_data['baseInfo']['发卡日期'] = infors['AAZ503']
            if 'AAE004' in infors.keys():
                self.result_data['baseInfo']['手机号'] = infors['AAE004']
            if 'AAE005' in infors.keys():
                self.result_data['baseInfo']['固定号码'] = infors['AAE005']
            if 'AAE006' in infors.keys():
                self.result_data['baseInfo']['常住地址'] = infors['AAE006']
            if 'AAZ220' in infors.keys():
                self.result_data['baseInfo']['邮编'] = infors['AAZ220']

            urls = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=10S005&bustype=01&refresh=true&client=NBHRSS_WEB'
            resp = self.s.get(urls)
            Fivestatus = {}
            #   #养老状态
            resp = self.s.get(
                'https://rzxt.nbhrss.gov.cn/nbsbk-rzxt/web/pages/query/query-ylbx.jsp'
            )

            #第一次
            ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S099&bustype=01&refresh=true&client=NBHRSS_WEB'
            resp = self.s.get(ylurl)

            # 第二次
            ylurl = 'https://rzxt.nbhrss.gov.cn/nbsbk-rzxt/rzxt/getTimeOut.action'
            resp = self.s.post(ylurl)

            # 第三次
            ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S001&bustype=01&refresh=true&param={"AAB301":"330200"}&client=NBHRSS_WEB'
            resp = self.s.get(ylurl)
            soupyl = BeautifulSoup(resp.content, 'html.parser')
            ylinfo = json.loads(soupyl.text)
            if ylinfo['ret'] == '1':
                ylinfos = json.loads(ylinfo['result'])
                cbzt = ylinfos['AAC008']  # arrstr[3].replace('参保状态:', '')
                Fivestatus = {'养老': cbzt}
                if cbzt == '参保缴费':
                    cbzt = '正常参保'
                else:
                    cbzt = '停缴'
                self.result_data["baseInfo"].setdefault(
                    '单位名称', ylinfos['AAB004'])

        #养老第四次
            ylurls = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S002&bustype=01&param={"AAB301":"330200","PAGENO":1,"PAGESIZE":10000}&client=NBHRSS_WEB'
            resps = self.s.get(ylurls)
            soupyls = BeautifulSoup(resps.content, 'html.parser')
            ylinfos = json.loads(soupyls.text)
            if ylinfos['ret'] == '1':
                ylinfof = json.loads(ylinfos['result'])
                self.result_data['old_age'] = {}
                self.result_data['old_age']['data'] = {}
                years = ''
                months = ''
                maxtime = ''
                y = 1
                for i in range(0, len(ylinfof['COSTLIST']['COST'])):
                    arr = []
                    cell = ylinfof['COSTLIST']['COST'][i]
                    # if len(cell) < 3:
                    #     arrstr.extend(cell)
                    # elif len(cell) == 4:
                    yearmonth = cell['AAE002']
                    if years == '' or years != yearmonth[:4]:
                        years = yearmonth[:4]
                        self.result_data['old_age']['data'][years] = {}
                        if len(months) > 0:
                            if months == yearmonth[-2:]:
                                self.result_data['old_age']['data'][years][
                                    months] = {}
                    if months == '' or months != yearmonth[-2:]:
                        months = yearmonth[-2:]
                        self.result_data['old_age']['data'][years][months] = {}
                    dicts = {
                        '缴费时间': cell['AAE002'],
                        '缴费类型': '',
                        '缴费基数': cell['AAE180'],
                        '公司缴费': '',
                        '个人缴费': cell['AAE022'],
                        '缴费单位': '',
                        '到账情况': cell['AAE078']
                    }
                    if y == 1:
                        maxtime = cell['AAE002']
                        y = 2
                    arr.append(dicts)
                    self.result_data['old_age']['data'][years][months] = arr

                nowyears = time.strftime("%Y", time.localtime())
                #第五次
                ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S003&bustype=01&refresh=true&param={"AAB301":"330200"}&client=NBHRSS_WEB'
                resp = self.s.get(ylurl)
                soupyl = BeautifulSoup(resp.content, 'html.parser')
                ylinfo = json.loads(soupyl.text)
                if ylinfo['ret'] == '1':
                    ylinfos = json.loads(ylinfo['result'])
                    if len(ylinfos['COSTLIST']['COST']) > 1:
                        jfscolder = int(
                            ylinfos['COSTLIST']['COST'][0]['AAE091']
                        )  # arrstr[10].replace('至本年末实际缴费月数:', '')
                        ljjfolder = float(
                            ylinfos['COSTLIST']['COST'][0]['AAE382']
                        )  # arrstr[9].replace('至本年末账户累计储存额:', '')
                    else:
                        jfscolder = 0
                        ljjfolder = 0.00
                else:
                    jfscolder = 0
                    ljjfolder = 0.00
                if nowyears in self.result_data['old_age']['data'].keys():
                    for k, v in self.result_data['old_age']['data'][
                            nowyears].items():
                        jfscolder = jfscolder + 1
                        ljjfolder = ljjfolder + float(v[0]['个人缴费'])
                self.result_data["baseInfo"].setdefault('缴费时长', jfscolder)
                self.result_data["baseInfo"].setdefault('个人养老累计缴费', ljjfolder)
                self.result_data["baseInfo"].setdefault('最近缴费时间', maxtime)
                if len(self.result_data['old_age']['data']) > 0:
                    ksjfsj = min(self.result_data['old_age']['data'])
                    self.result_data["baseInfo"].setdefault(
                        '开始缴费时间', ksjfsj +
                        min(self.result_data['old_age']['data'][ksjfsj]))
                else:
                    self.result_data["baseInfo"].setdefault('开始缴费时间', '')

            #医疗
            resp = self.s.get(
                'https://rzxt.nbhrss.gov.cn/nbsbk-rzxt/web/pages/query/query-yilbx.jsp'
            )
            # 第一次
            ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S099&bustype=01&refresh=true&client=NBHRSS_WEB'
            resp = self.s.get(ylurl)

            # 第二次
            ylurl = 'https://rzxt.nbhrss.gov.cn/nbsbk-rzxt/rzxt/getTimeOut.action'
            resp = self.s.post(ylurl)

            # 第三次
            ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S011&bustype=01&refresh=true&param={"AAB301":"330200"}&client=NBHRSS_WEB'
            resp = self.s.get(ylurl)
            soupyl = BeautifulSoup(resp.content, 'html.parser')
            ylinfo = json.loads(soupyl.text)
            ylinfos = json.loads(ylinfo['result'])
            cbzt = ylinfos['AAC008']  # arrstr[3].replace('参保状态:', '')
            # if cbzt == '参保缴费':
            #     cbzt = '正常参保'
            # else:
            #     cbzt = '停缴'
            Fivestatus.setdefault('医疗', cbzt)

            # 第四次
            ylurls = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S012&bustype=01&param={"AAB301":"330200","PAGENO":1,"PAGESIZE":10000}&client=NBHRSS_WEB'
            resps = self.s.get(ylurls)
            soupyls = BeautifulSoup(resps.content, 'html.parser')
            ylinfos = json.loads(soupyls.text)
            if ylinfos['ret'] == '1':
                ylinfof = json.loads(ylinfos['result'])
                self.result_data['medical_care'] = {}
                self.result_data['medical_care']['data'] = {}
                years = ''
                months = ''
                for i in range(0, len(ylinfof['COSTLIST']['COST'])):
                    arr = []
                    cell = ylinfof['COSTLIST']['COST'][i]
                    yearmonth = cell['AAE002']
                    if years == '' or years != yearmonth[:4]:
                        years = yearmonth[:4]
                        self.result_data['medical_care']['data'][years] = {}
                        if len(months) > 0:
                            if months == yearmonth[-2:]:
                                self.result_data['medical_care']['data'][
                                    years][months] = {}
                    if months == '' or months != yearmonth[-2:]:
                        months = yearmonth[-2:]
                        self.result_data['medical_care']['data'][years][
                            months] = {}
                    dicts = {
                        '缴费时间': cell['AAE002'],
                        '缴费类型': '',
                        '缴费基数': cell['AAE180'],
                        '公司缴费': '',
                        '个人缴费': cell['AAE022'],
                        '缴费单位': '',
                        '到账情况': cell['AAE078']
                    }
                    arr.append(dicts)
                    self.result_data['medical_care']['data'][years][
                        months] = arr
                # print(arrstr)

                nowyears = time.strftime("%Y", time.localtime())
                ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S013&bustype=01&refresh=true&param={"AAB301":"330200"}&client=NBHRSS_WEB'
                resp = self.s.get(ylurl)
                soupyl = BeautifulSoup(resp.content, 'html.parser')
                ylinfo = json.loads(soupyl.text)
                if ylinfo['ret'] == '1':
                    ylinfos = json.loads(ylinfo['result'])
                    if len(ylinfos) > 1:
                        ljjfolder = float(
                            ylinfos['AKC087']
                        )  # arrstr[9].replace('至本年末账户累计储存额:', '')
                    else:
                        ljjfolder = 0.00
                else:
                    ljjfolder = 0.00
                if nowyears in self.result_data['old_age']['data'].keys():
                    for k, v in self.result_data['old_age']['data'][
                            nowyears].items():
                        ljjfolder = ljjfolder + float(v[0]['个人缴费'])
                self.result_data["baseInfo"].setdefault('个人医疗累计缴费', ljjfolder)

            # 工伤
            ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S018&bustype=01&refresh=true&param={"AAB301":"330200"}&client=NBHRSS_WEB'
            resp = self.s.get(ylurl)
            soupyl = BeautifulSoup(resp.content, 'html.parser')
            ylinfo = json.loads(soupyl.text)
            if ylinfo['ret'] == '1':
                ylinfos = json.loads(ylinfo['result'])
                cbzt = ylinfos['AAC008']  # arrstr[3].replace('参保状态:', '')
                # if cbzt == '参保缴费':
                #     cbzt = '正常参保'
                # else:
                #     cbzt = '停缴'
                Fivestatus.setdefault('工伤', cbzt)

            # 生育
            ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S019&bustype=01&refresh=true&param={"AAB301":"330200"}&client=NBHRSS_WEB'
            resp = self.s.get(ylurl)
            soupyl = BeautifulSoup(resp.content, 'html.parser')
            ylinfo = json.loads(soupyl.text)
            if ylinfo['ret'] == '1':
                ylinfos = json.loads(ylinfo['result'])
                cbzt = ylinfos['AAC008']  # arrstr[3].replace('参保状态:', '')
                # if cbzt == '参保缴费':
                #     cbzt = '正常参保'
                # else:
                #     cbzt = '停缴'
                Fivestatus.setdefault('生育', cbzt)

            #失业
            ylurl = 'https://app.nbhrss.gov.cn/nbykt/rest/commapi?access_token=' + self.g.access_token + '&api=91S020&bustype=01&refresh=true&param={"AAB301":"330200"}&client=NBHRSS_WEB'
            resp = self.s.get(ylurl)
            soupyl = BeautifulSoup(resp.content, 'html.parser')
            ylinfo = json.loads(soupyl.text)
            if ylinfo['ret'] == '1':
                ylinfos = json.loads(ylinfo['result'])
                cbzt = ylinfos['AAC008']  # arrstr[3].replace('参保状态:', '')
                # if cbzt == '参保缴费':
                #     cbzt = '正常参保'
                # else:
                #     cbzt = '停缴'
                Fivestatus.setdefault('失业', cbzt)
            # self.result_data["baseInfo"] = {
            #     '城市名称': '宁波',
            #     '城市编号': '330200',
            #     '更新时间': time.strftime("%Y-%m-%d", time.localtime()),
            #     '姓名': soup.select('#xm')[0].text,
            #     '性别': soup.select('#xb')[0].text,
            #     '身份证号': soup.select('#sfz')[0].text,
            #     '国籍': soup.select('#gj')[0].text,
            #     '社会保障卡号码': soup.select('#sbkh')[0].text,
            #     '社保卡状态': soup.select('#kzt')[0].text,
            #     '银行账号': soup.select('#yhkh')[0].text,
            #     '发卡日期': soup.select('#fkrq')[0].text,
            #     '手机号': soup.select('#sjhm')[0].text,
            #     '固定号码': soup.select('#gddh')[0].text,
            #     '常住地址': soup.select('#czdz')[0].text,
            #     '邮编': soup.select('#yzbm')[0].text
            # }

            # self._yanglao()
            # self._yiliao()
            # self._gongshang()
            # self._shiye()
            # self._shengyu()
            self.result_data["baseInfo"].setdefault('五险状态', Fivestatus)
            if '参保缴费' in Fivestatus.values():
                self.result_identity['status'] = '正常'
            else:
                self.result_identity['status'] = '停缴'
            return
        except PermissionError as e:
            raise PreconditionNotSatisfiedError(e)

    def _new_vc(self):
        vc_url = VC_URL + time.strftime('%a %b %d %Y %H:%M:%S',
                                        time.localtime())
        resp = self.s.get(vc_url)
        return dict(cls='data:image',
                    content=resp.content,
                    content_type=resp.headers.get('Content-Type'))