def _unit_fetch_userinfo(self): """用户信息""" try: '''第三次''' strr = '?r=' + str(random.random()) resp = self.s.post( USERINFO_URL + strr, data=dict( _isModel='true', params= '{"oper":"QfzscxAction.queryQfzs","params":{},"datas":{"ncm_gt_欠费总数":{"params":{}}}}' ), headers={ 'X-Requested-With': 'XMLHttpRequest', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': 'application / json, text / plain, * / *', 'Token': self.g.Token, 'Connection': 'keep - alive' }, timeout=15) self.s.Token = resp.cookies._cookies['seyb.szsi.gov.cn']['/'][ 'Token'].value # TODO: 执行任务,如果没有登录,则raise PermissionError return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_DETAILED(self, url): try: resp = self.s.post(url) soup = BeautifulSoup(str(resp.content, 'utf-8'), "html.parser") return soup except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_info(self): try: data = self.result_data resp = self.s.post(USER_INFO_URL) soup = BeautifulSoup(str(resp.content, 'utf-8'), 'html.parser') result = soup.findAll('table') table = result[23] tds = table.findAll('td') data["baseInfo"] = { "姓名": tds[2].text, "社保编号": tds[4].text, "单位名称": tds[6].text, "出生日期": tds[8].text, "开始缴费时间": tds[10].text, "当前账户状态": tds[12].text, "身份证号": self.result['key'], "更新时间": datetime.datetime.now().strftime('%Y-%m-%d'), "城市名称": '昆山', "城市编号": '320583', "缴费时长": '', "最近缴费时间": '', "个人养老累计缴费": '', "个人医疗累计缴费": '' } # 养老(正常数据与其他补缴信息) data["old_age"] = { "data": {} } # 医疗(正常数据与其他补缴信息) data["medical_care"] = { "data": {} } # 工伤(正常数据与其他补缴信息) data["injuries"] = { "data": {} } # 生育(正常数据与其他补缴信息) data["maternity"] = { "data": {} } # 失业(正常数据与其他补缴信息) data["unemployment"] = { "data": {} } # 设置identity identity = self.result_identity identity.update({ 'task_name': '昆山', 'target_name': tds[2].text, 'target_id': self.result['meta']["身份证号"], 'status': tds[12].text, }) return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_medical_care(self, start_job): data = self.result['data'] # 统计养老实际缴费月数 self.medical_care_month = 0 # 统计个人缴费养老金额 self.my_self_medical_care = 0 # 最近参保时间 self.medical_care_lately_data = '199201' try: nowTime = int(time.strftime('%Y', time.localtime(time.time()))) for year in range(nowTime, int(start_job) - 1, -1): time.sleep(0.8) # 根据类型获取解析后的页面 content = self._unit_fetch_user_DETAILED("023", year) # 返回结果 result = json.loads(content) if result["code"] == '1': data["medical_care"]["data"][str(year)] = {} isStart = True # 循环行 for item in result["result"]: # 个人缴费金额 grjfje = item.get('grjfje', '0') # 个人缴费累金额 self.my_self_medical_care = self.my_self_medical_care + float(grjfje) # 定义数据结构 obj = { "缴费时间": item.get("xssj", ''), "缴费类型": item.get("jflx", ''), "缴费基数": item.get("jfjs", ''), "公司缴费": '', "个人缴费": item.get('grjfje', ''), "缴费单位": item.get("dwmc", ''), } if item["fkkm"] == "基本医疗保险" and item["jfbz"] == "已实缴": if (nowTime == year and isStart) or (self.medical_care_lately_data == "199201"): self.medical_care_lately_data = item["xssj"].replace("-", "") isStart = False # 累计正常缴费的缴费月数 self.medical_care_month = self.medical_care_month + 1 # 苏州目前账号来看每个月只会生成一条数据, # normal.append(obj) try: data["medical_care"]["data"][str(year)][str(item["xssj"][5:])].append(obj) except: data["medical_care"]["data"][str(year)][str(item["xssj"][5:])] = [obj] elif item["fkkm"] == "大额医疗保险" and item["jfbz"] == "已实缴": try: data["medical_care"]["data"][str(year)][str(item["xssj"][5:])].append(obj) except: data["medical_care"]["data"][str(year)][str(item["xssj"][5:])] = [obj] except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_DETAILED(self, bizType, year): try: resp = self.s.post(DETAILED_LIST_URL, data={ 'code': bizType, 'year': year, 'pageSize': 200 }) return resp.content except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_name(self): try: data = self.result_data data['baseInfo'] = { '城市名称': '哈尔滨', '城市编号': '230100', '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '证件类型': '身份证' } resp = self.html soup = BeautifulSoup(resp, 'html.parser') table_text = soup.findAll('table') if table_text: rows = table_text[2].find_all('tr') for row in rows: cell = [i.text for i in row.find_all('td')] if len(cell) == 4: data['baseInfo'][cell[0].replace('\n', '').replace( '账户余额', '当前余额').replace('本年支取总额', '当年提取金额').replace( '本年缴存总额', '当年缴存金额').replace('月汇缴金额', '月应缴额').replace( '个人缴存基数', '缴存基数').replace(' ', '').replace( '身份证号', '证件号')] = re.sub( '[\n \t \n\r]', '', cell[1].replace('\xa0', '')) data['baseInfo'][cell[2].replace('\n', '').replace( ' ', '').replace('状态', '帐户状态').replace( '最后汇缴年月', '最后业务日期').replace( '\r \xa0', '').replace(' ', '')] = re.sub( '[\n \t \n\r]', '', cell[3].replace('\xa0', '').replace('-', '')) self.result_identity['target_name'] = data['baseInfo']['姓名'] self.result_identity['status'] = data['baseInfo']['帐户状态'] data['companyList'] = [] diclist = { '单位名称': data['baseInfo']['单位名称'], '当前余额': data['baseInfo']['当前余额'], '帐户状态': data['baseInfo']['帐户状态'], '当年缴存金额': data['baseInfo']['当年缴存金额'], '当年提取金额': data['baseInfo']['当年提取金额'], '最后业务日期': data['baseInfo']['最后业务日期'] } data['companyList'].append(diclist) return except (AssertionError, InvalidParamsError) as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # TODO: 执行任务,如果没有登录,则raise PermissionError Tokenurl = self.s.get( 'http://218.28.166.74:8080/zzsbonline/loginAction', timeout=10) tokenhtml = BeautifulSoup(Tokenurl.content, 'html.parser') tokens = tokenhtml.select('#token')[0].attrs['value'] resp = self.s.post(INFO_URL, data=dict(idno='', token=tokens), timeout=10) soup = BeautifulSoup(resp.content, 'html.parser') infors = json.loads(soup.text) stepflag = infors[0]['stepflag'] if stepflag == '0': stepflag = '未办卡' elif stepflag == '6': stepflag = '持卡正常' insuretype = infors[0]['insuretype'] if insuretype == '1': insuretype = '正常参保' elif insuretype == '2': insuretype = '已停保' self.result_data["baseInfo"] = { '城市名称': '郑州', '城市编号': '410100', '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '姓名': infors[0]['name'], '身份证号': infors[0]['idno'], '个人编号': infors[0]['personelno'], '社会保障卡号码': infors[0]['cardno'], '社保卡状态': stepflag, '单位编号': infors[0]['companyid'], '单位名称': infors[0]['companyname'], '卡余额': infors[0]['balance'], '缴费基数': infors[0]['payBase'], '参保状态': insuretype, '缴费时长': 0, '最近缴费时间': '', '开始缴费时间': '', '个人养老累计缴费': 0, '个人医疗累计缴费': 0 } self.result_identity['target_name'] = infors[0]['name'] if '正常参保' in insuretype: self.result_identity['status'] = '正常' else: self.result_identity['status'] = '停缴' return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_DETAILED(self, page_type, year): try: resp = self.s.post(DETAILED_LIST_URL + page_type + '?searchYear=' + str(year) + '&time=' + str(int(round(time.time() * 1000)))) soup = BeautifulSoup( str(resp.content, 'utf-8').replace('\r', '').replace('\t', '').replace( '\n', '').replace(' ', '').replace('</tr> </tr>', '</tr>'), "html.parser") return soup except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_injuries(self, start_job): data = self.result['data'] # 统计养老实际缴费月数 self.injuries_month = 0 # 最近参保时间 self.injuries_lately_data = '199201' try: nowTime = int(time.strftime('%Y', time.localtime(time.time()))) for year in range(nowTime, int(start_job) - 1, -1): time.sleep(0.8) # 根据类型获取解析后的页面 content = self._unit_fetch_user_DETAILED("052", year) # 返回结果 result = json.loads(content) if result["code"] == '1': data["injuries"]["data"][str(year)] = {} isStart = True # 循环行 for item in result["result"]: # 定义数据结构 obj = { "缴费时间": item.get("xssj", ''), "缴费类型": item.get("jflx", ''), "缴费基数": item.get("jfjs", ''), "公司缴费": '', "个人缴费": item.get('grjfje', ''), "缴费单位": item.get("dwmc", ''), } if item["jfbj"] == "足额缴费": if (nowTime == year and isStart) or (self.injuries_lately_data == "199201"): self.injuries_lately_data = item["xssj"].replace("-", "") isStart = False # 累计正常缴费的缴费月数 self.injuries_month = self.injuries_month + 1 # 苏州目前账号来看每个月只会生成一条数据, # normal.append(obj) try: data["injuries"]["data"][str(year)][str(item["xssj"][5:])].append(obj) except: data["injuries"]["data"][str(year)][str(item["xssj"][5:])] = [obj] # else: # doubt.append(obj) # data["injuries"]["bizDoubtData"][str(year)][str(item["xssj"][5:])] = obj except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_DETAILED(self, bizType): try: resp = self.s.post(DETAILED_LIST_URL, data={ 'xz': 2, 'pageIndex': 1, 'pageCount': 99999999, 'nu': bizType }) soup = BeautifulSoup( json.loads(str(resp.content, 'utf-8'))["content" + str(bizType)], "html.parser") return soup except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_get_payment_details(self): try: data = self.result['data'] # 养老明细 self._unit_fetch_user_old_age() # 医疗明细 self._unit_fetch_user_medical_care() # 工伤明细 self._unit_fetch_user_injuries() # 生育明细 self._unit_fetch_user_maternity() # 失业明细 self._unit_fetch_user_unemployment() # 五险所有缴费时间 social_payment_duration = [ self.old_age_month, self.medical_care_month, self.injuries_month, self.maternity_month, self.unemployment_month ] # 五险最近缴费时间 latest_time = [ self.old_age_lately_data.strip(), self.medical_care_lately_data.strip(), self.injuries_lately_data.strip(), self.maternity_lately_data.strip(), self.unemployment_lately_data.strip() ] # 五险开始缴费时间 latest_start_time = [ self.old_age_lately_start_data.strip(), self.medical_care_lately_start_data.strip(), self.injuries_lately_start_data.strip(), self.maternity_lately_start_data.strip(), self.unemployment_lately_start_data.strip() ] data["baseInfo"]["缴费时长"] = str(max(social_payment_duration)) data["baseInfo"]["最近缴费时间"] = str(max(latest_time)) data["baseInfo"]["开始缴费时间"] = str(min(latest_start_time)) data["baseInfo"]["个人养老累计缴费"] = str(self.my_self_old_age) data["baseInfo"]["个人医疗累计缴费"] = str(self.my_self_medical_care) except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_x(self): try: # 设置data data = self.result['data'] data['x'] = self.ua.x() # 设置identity identity: dict = self.result['identity'] identity.update({ 'task_name': '测试simple', 'target_name': self.result['meta']['username'], 'target_id': self.result['meta']['username'], 'status': '正常', }) return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_info(self): try: data = self.result_data resp = self.s.post(USER_INFO_URL) soup = BeautifulSoup(resp.content, 'html.parser') div_table = soup.find('input', {'name': 'psMsgBar'}).attrs['value'] name = div_table.split('td')[7][1:-2] personNum = self.result_meta["个人编号"] sfzNum = self.result_meta["身份证号"] data["baseInfo"] = { "姓名": name, "社会保障号": personNum, "身份证号": sfzNum, "更新时间": datetime.datetime.now().strftime('%Y-%m-%d'), '城市名称': '苏州', '城市编号': '320500', '缴费时长': '', '最近缴费时间': '', '开始缴费时间': '', '个人养老累计缴费': '', '个人医疗累计缴费': '' } # 养老(正常数据与其他补缴信息) data["old_age"] = {"data": {}} # 医疗(正常数据与其他补缴信息) data["medical_care"] = {"data": {}} # 工伤(正常数据与其他补缴信息) data["injuries"] = {"data": {}} # 生育(正常数据与其他补缴信息) data["maternity"] = {"data": {}} # 失业(正常数据与其他补缴信息) data["unemployment"] = {"data": {}} # 设置identity self.result_identity['target_name'] = name self.result_identity['status'] = '' return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_name(self): try: # 设置data data = self.result['data'] resp = self.s.get(MAIN_URL) # FIXME: # soup = BeautifulSoup(resp.content, 'html.parser') # name = soup.select('#kind1 > table > tbody > tr:nth-child(2) > td:nth-child(2)')[0]['value'] data['name'] = '卜礼祥' # 设置identity identity: dict = self.result['identity'] identity.update({ 'task_name': '测试real', 'target_name': data['name'], 'target_id': self.result['meta']['id_num'], 'status': '正常', }) return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_medical_treatment(self): data = self.result['data'] try: resp = self.s.post(MEDICAL_TREATMENT_URL) soup = BeautifulSoup(str(resp.content, 'utf-8'), "html.parser") result = soup.find('div', {'class': 'tab'}) # 数据行 tds = result.findAll("td") data["medical_treatment"] = { "单位名称": tds[4].text[5:], "姓名": tds[5].text[3:], "单位": tds[6].text[3:], re.sub('\s', '', tds[8].text): re.sub('\s', '', tds[9].text), re.sub('\s', '', tds[10].text): re.sub('\s', '', tds[11].text), re.sub('\s', '', tds[12].text): re.sub('\s', '', tds[13].text), re.sub('\s', '', tds[14].text): re.sub('\s', '', tds[15].text), re.sub('\s', '', tds[16].text): re.sub('\s', '', tds[17].text), "门诊": str( float(re.sub('\s', '', tds[9].text)) + float(re.sub('\s', '', tds[11].text))), "住院": str( float(re.sub('\s', '', tds[15].text)) + float(re.sub('\s', '', tds[17].text))) } except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_user_unemployment(self): data = self.result['data'] # 统计失业实际缴费月数 self.unemployment_month = 0 # 最近参保时间 self.unemployment_lately_data = '199201' # 最早参保时间 self.unemployment_lately_start_data = '199201' try: # 根据类型获取解析后的页面 soup = self._unit_fetch_user_DETAILED(8) # 拿table中的tr进行循环 trs = soup.findAll('tr') # 从数据集获取年份集合 years = [] # 正常缴费明细数据集合 normal = [] num = 0 # 循环行 for tr in trs: # 查找该行所有td tds = tr.findAll('td') try: int(tds[0].text.strip()) except: continue num = num + 1 try: # 需要爬取的数据id从1开始 if int(tds[0].text.strip()) > 0: # 获取当前年份 year = tds[0].text[0:4] # 获取当前月份 month = tds[0].text[0:4] + "-" + tds[0].text[4:6] # 正常年份累计到年份数据源中 if year not in years: years.append(year) # 获取表单的第一个时间为最新缴费时间 if num == 1: self.unemployment_lately_data = tds[0].text # 获取最早参保时间 if len(trs) - 2 == num: self.unemployment_lately_start_data = tds[0].text # 定义数据结构 obj = { "year": year, "data": { "缴费时间": month, "缴费类型": tds[2].text.strip(), "缴费基数": tds[3].text.strip(), "公司缴费": tds[5].text.strip(), "个人缴费": tds[6].text.strip(), "缴费单位": tds[1].text.strip(), } } # 累计正常缴费的缴费月数 self.unemployment_month = self.unemployment_month + 1 normal.append(obj) except PermissionError as e: raise PreconditionNotSatisfiedError(e) for year in years: # 正常费用明细数据集合(临时) tempNormal = [] for items in normal: if items["year"] == year: tempNormal.append(items["data"]) else: continue if tempNormal.__len__() > 0: tempNormal.reverse() data["unemployment"]["data"][str(year)] = {} for item in tempNormal: try: data["unemployment"]["data"][str(year)][str( item["缴费时间"][5:])].append(item) except: data["unemployment"]["data"][str(year)][str( item["缴费时间"][5:])] = [item] except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: self.result_data['baseInfo'] = {} times = str(int(time.strftime("%Y", time.localtime())) - 1) icard = self.result_meta['社保号'] res = self.s.get( Half_URL + 'a5c27955-1489-4f81-9781-18ee9ace9ec3&AS_AAE001=' + times + '&AS_AAE135=' + icard) soup = BeautifulSoup(res.text, 'html.parser').findAll('tr') redetail = self.s.get( Half_URL + 'da89388c-5c59-452f-b2bd-8f54effeda33&AS_AAE001=' + times + '&AS_AAE135=' + icard) soupDetail = BeautifulSoup(redetail.text, 'html.parser').findAll('tr') # 明细 datas = '{header:{"code": -100, "message": {"title": "", "detail": ""}},body:{dataStores:{contentStore:{rowSet:{"primary":[],"filter":[],"delete":[]},name:"contentStore",pageNumber:1,pageSize:2147483647,recordCount:0,statementName:"si.treatment.ggfw.content",attributes:{"AAC002": ["' + icard + '", "12"],}},xzStore:{rowSet:{"primary":[],"filter":[],"delete":[]},name:"xzStore",pageNumber:1,pageSize:2147483647,recordCount:0,statementName:"si.treatment.ggfw.xzxx",attributes:{"AAC002": ["' + icard + '", "12"],}},sbkxxStore:{rowSet:{"primary":[],"filter":[],"delete":[]},name:"sbkxxStore",pageNumber:1,pageSize:2147483647,recordCount:0,statementName:"si.treatment.ggfw.sbkxx",attributes:{"AAC002": ["' + icard + '", "12"],}},grqyjlStore:{rowSet:{"primary":[],"filter":[],"delete":[]},name:"grqyjlStore",pageNumber:1,pageSize:2147483647,recordCount:0,statementName:"si.treatment.ggfw.grqyjlyj",attributes:{"AAE135": ["' + icard + '", "12"]}}},parameters:{"BUSINESS_ID": "UCI314", "BUSINESS_REQUEST_ID": "REQ-IC-Q-098-60", "CUSTOMVPDPARA": "", "PAGE_ID": ""}}}' totalresp = self.s.post(Main_URL, datas) totalinfo = demjson.decode( totalresp.text )['body']['dataStores']['xzStore']['rowSet']['primary'] # 养老保险明细 # # self.result['data']["old_age"] = {"data": {}} # basedataE = self.result['data']["old_age"]["data"] modelE = {} EICount = soupDetail[18].findAll('td')[0].text EIMoney = soupDetail[18].findAll('td')[1].text.replace(',', '') EIType = totalinfo[0]['AAC008'] # 医疗保险明细 # self.result['data']["medical_care"] = {"data": {}} basedataH = self.result['data']["medical_care"]["data"] modelH = {} HIType = totalinfo[1]['AAC008'] HCompany = totalinfo[1]['AAB004'] # sanxian='{header:{"code": -100, "message": {"title": "", "detail": ""}},body:{dataStores:{searchStore:{rowSet:{"primary":[],"filter":[],"delete":[]},name:"searchStore",pageNumber:1,pageSize:20,recordCount:0,context:{"BUSINESS_ID": "UOA017", "BUSINESS_REQUEST_ID": "REQ-OA-M-013-01", "CUSTOMVPDPARA": ""},statementName:"si.treatment.ggfw.yljf",attributes:{"AAC002": ["'+icard+'", "12"],}}},parameters:{"BUSINESS_ID": "UOA017", "BUSINESS_REQUEST_ID": "REQ-OA-M-013-01", "CUSTOMVPDPARA": "", "PAGE_ID": ""}}}' sanxian = '{header:{"code": -100, "message": {"title": "", "detail": ""}},body:{dataStores:{searchStore:{rowSet:{"primary":[],"filter":[],"delete":[]},name:"searchStore",recordCount:0,context:{"BUSINESS_ID": "UOA017", "BUSINESS_REQUEST_ID": "REQ-OA-M-013-01", "CUSTOMVPDPARA": ""},statementName:"si.treatment.ggfw.yljf",attributes:{"AAC002": ["' + icard + '", "12"],}}},parameters:{"BUSINESS_ID": "UOA017", "BUSINESS_REQUEST_ID": "REQ-OA-M-013-01", "CUSTOMVPDPARA": "", "PAGE_ID": ""}}}' sanxianresp = self.s.post(Main_URL, sanxian) sanDetail = demjson.decode( sanxianresp.text )['body']['dataStores']['searchStore']['rowSet']['primary'] for k in range(len(sanDetail)): if (sanDetail[k]['AC43_AAE140'] == "城镇职工基本医疗保险"): yearH = sanDetail[k]['AC43_AAE003'][0:4] monthH = sanDetail[k]['AC43_AAE003'][4:6] basedataH.setdefault(yearH, {}) basedataH[yearH].setdefault(monthH, []) modelH = { '缴费单位': HCompany, '缴费类型': HIType, '缴费时间': sanDetail[k]['AC43_AAE003'], '缴费基数': sanDetail[k]['AC43_AAE018'], '公司缴费': sanDetail[k]['AC43_AAE022'], '个人缴费': sanDetail[k]['AC43_AAE021'], } basedataH[yearH][monthH].append(modelH) # 失业保险明细 # self.result['data']["unemployment"] = {"data": {}} basedataI = self.result['data']["unemployment"]["data"] modelI = {} IIType = totalinfo[2]['AAC008'] # jsons='{header:{"code": -100, "message": {"title": "", "detail": ""}},body:{dataStores:{searchStore:{rowSet:{"primary":[],"filter":[],"delete":[]},name:"searchStore",pageNumber:1,pageSize:20,recordCount:0,context:{"BUSINESS_ID": "UOA017", "BUSINESS_REQUEST_ID": "REQ-OA-M-013-01", "CUSTOMVPDPARA": ""},statementName:"si.treatment.ggfw.syjf",attributes:{"AAC002": ["'+icard+'", "12"],}}},parameters:{"BUSINESS_ID": "UOA017", "BUSINESS_REQUEST_ID": "REQ-OA-M-013-01", "CUSTOMVPDPARA": "", "PAGE_ID": ""}}}' jsons = '{header:{"code": -100, "message": {"title": "", "detail": ""}},body:{dataStores:{searchStore:{rowSet:{"primary":[],"filter":[],"delete":[]},name:"searchStore",recordCount:0,context:{"BUSINESS_ID": "UOA017", "BUSINESS_REQUEST_ID": "REQ-OA-M-013-01", "CUSTOMVPDPARA": ""},statementName:"si.treatment.ggfw.syjf",attributes:{"AAC002": ["' + icard + '", "12"],}}},parameters:{"BUSINESS_ID": "UOA017", "BUSINESS_REQUEST_ID": "REQ-OA-M-013-01", "CUSTOMVPDPARA": "", "PAGE_ID": ""}}}' IIresp = self.s.post(Main_URL, jsons) iiDetail = demjson.decode( IIresp.text )['body']['dataStores']['searchStore']['rowSet']['primary'] for b in range(len(iiDetail)): yearI = iiDetail[b]['AC43_AAE003'][0:4] monthI = iiDetail[b]['AC43_AAE003'][4:6] basedataI.setdefault(yearI, {}) basedataI[yearI].setdefault(monthI, []) modelI = { '缴费单位': totalinfo[2]['AAB004'], '缴费类型': IIType, '缴费时间': iiDetail[b]['AC43_AAE003'], '缴费基数': iiDetail[b]['AC43_AAE018'], '公司缴费': iiDetail[b]['AC43_AAE022'], '个人缴费': iiDetail[b]['AC43_AAE021'], } basedataI[yearI][monthI].append(modelI) # 工伤保险明细 # # self.result['data']["injuries"] = {"data": {}} # basedataC = self.result['data']["injuries"]["data"] modelC = {} # 生育保险明细 # self.result['data']["maternity"] = {"data": {}} basedataB = self.result['data']["maternity"]["data"] modelB = {} BIType = totalinfo[3]['AAC008'] for p in range(len(sanDetail)): if (sanDetail[p]['AC43_AAE140'] == "生育保险"): yearB = sanDetail[p]['AC43_AAE003'][0:4] monthB = sanDetail[p]['AC43_AAE003'][4:6] basedataB.setdefault(yearB, {}) basedataB[yearB].setdefault(monthB, []) modelB = { '缴费单位': HCompany, '缴费类型': BIType, '缴费时间': sanDetail[p]['AC43_AAE003'], '缴费基数': sanDetail[p]['AC43_AAE018'], '公司缴费': sanDetail[p]['AC43_AAE022'], '个人缴费': '', } basedataB[yearB][monthB].append(modelB) # 大病保险明细 # self.result['data']["serious_illness"] = {"data": {}} basedataP = self.result['data']["serious_illness"]["data"] modelP = {} for q in range(len(sanDetail)): if (sanDetail[q]['AC43_AAE140'] == "大额医疗费用补助"): yearP = sanDetail[q]['AC43_AAE003'][0:4] monthP = sanDetail[q]['AC43_AAE003'][4:6] basedataP.setdefault(yearP, {}) basedataP[yearP].setdefault(monthP, []) modelP = { '缴费单位': HCompany, '缴费类型': HIType, '缴费时间': sanDetail[q]['AC43_AAE003'], '缴费基数': sanDetail[q]['AC43_AAE018'], '公司缴费': sanDetail[q]['AC43_AAE022'], '个人缴费': '' } basedataP[yearP][monthP].append(modelP) # 个人基本信息 status = "" infoDetial = demjson.decode( totalresp.text )['body']['dataStores']['sbkxxStore']['rowSet']['primary'][0] wuxiantype = { '养老': EIType, '医疗': HIType, '失业': IIType, '生育': BIType } if (EIType == "正常参保"): status = '正常' else: status = '停缴' if soup[10].findAll('td')[1].text != '': self.result_data['baseInfo'] = { '姓名': soup[10].findAll('td')[1].text, '身份证号': soup[10].findAll('td')[5].text, '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '城市名称': '石家庄', '城市编号': '130100', '缴费时长': EICount, '最近缴费时间': sanDetail[0]['AC43_AAE003'], '开始缴费时间': iiDetail[len(iiDetail) - 1]['AC43_AAE003'], '个人养老累计缴费': EIMoney, '个人医疗累计缴费': soup[40].findAll('td')[4].text, '五险状态': wuxiantype, '账户状态': status, '个人编号': soup[11].findAll('td')[1].text, '单位编号': soup[11].findAll('td')[3].text, '开户日期': soup[12].findAll('td')[1].text, } else: self.result_data['baseInfo'] = { '姓名': infoDetial['FACTNAME'], '身份证号': infoDetial['IDCARDNO'], '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '城市名称': '石家庄', '城市编号': '130100', '缴费时长': EICount, '最近缴费时间': sanDetail[0]['AC43_AAE003'], '开始缴费时间': iiDetail[len(iiDetail) - 1]['AC43_AAE003'], '个人养老累计缴费': EIMoney, '个人医疗累计缴费': '', '五险状态': wuxiantype, '账户状态': status, } # identity self.result['identity'] = { "task_name": "石家庄", "target_name": soup[10].findAll('td')[1].text, "target_id": self.result_meta['社保号'], "status": status } return except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: self.result_data['baseInfo'] = {} self.result_data['detail'] = {"data": {}} self.result_data['companyList'] = [] # 基本信息 res = self.s.get( "http://www.njgjj.com/init.summer?_PROCID=80000003") soup = BeautifulSoup(res.content, 'html.parser') uunum = soup.find('input', {'id': 'accnum'})['value'] username = soup.find('input', {'id': 'accname'})['value'] personid = soup.find('input', {'id': 'certinum'})['value'] times = str(time.time() * 1000)[0:13] data = { 'accname': username, 'accnum': uunum, 'prodcode': 1, '_PROCID': '80000003', '_PAGEID': 'step1', 'certinum': personid, } resp = self.s.post( "http://www.njgjj.com/command.summer?uuid=" + times, data) soupData = json.loads(resp.text)['data'] status = "" if (soupData['indiaccstate'] == "0"): status = '缴存' else: status = '封存' # 缴费明细 resDetail = self.s.get( "http://www.njgjj.com/init.summer?_PROCID=70000002") soopDetail = BeautifulSoup(resDetail.content, 'html.parser') ghosts = soopDetail.find('textarea', { 'name': 'DATAlISTGHOST' }).text pools = soopDetail.find('textarea', {'name': '_DATAPOOL_'}).text # data2={ # 'begdate':str(int(time.strftime("%Y",time.localtime()))-10)+time.strftime("-%m-%d",time.localtime()), # 'enddate':time.strftime("%Y-%m-%d",time.localtime()), # '_PROCID':'70000002', # #'accnum': uunum, # '_PAGEID':'step1', # 'accname':username, # '_ACCNUM': uunum, # '_IS':'-27157826' # resDetail.text.split('=')[46].split(',')[4].split(':')[1].replace('"','') # } #respDetail=self.s.post("http://www.njgjj.com/command.summer?uuid="+times+"",data2) #ghostss='rO0ABXNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAABdwQAAAAKc3IAJWNvbS55ZHlkLm5icC5lbmdpbmUucHViLkRhdGFMaXN0R2hvc3RCsjhA3j2pwwIAA0wAAmRzdAASTGphdmEvbGFuZy9TdHJpbmc7TAAEbmFtZXEAfgADTAADc3FscQB+AAN4cHQAEHdvcmtmbG93LmNmZy54bWx0AAlkYXRhbGlzdDJ0AL5zZWxlY3QgaW5zdGFuY2UsIHVuaXRhY2NudW0xLCB1bml0YWNjbmFtZSwgYWNjbnVtMSwgYWNjbmFtZTEsIGNlcnRpbnVtLCB0cmFuc2RhdGUsIHJlYXNvbiAsIGRwYnVzaXR5cGUsIGJhc2VudW0sIHBheXZvdWFtdCwgc2Vxbm8gZnJvbSBkcDA3NyB3aGVyZSBpbnN0YW5jZSA9LTI2ODM3MzY4IG9yZGVyIGJ5IHRyYW5zZGF0ZSBkZXNjeA==' #poolss='rO0ABXNyABZjb20ueWR5ZC5wb29sLkRhdGFQb29sp4pd0OzirDkCAAZMAAdTWVNEQVRFdAASTGphdmEvbGFuZy9TdHJpbmc7TAAGU1lTREFZcQB+AAFMAAhTWVNNT05USHEAfgABTAAHU1lTVElNRXEAfgABTAAHU1lTV0VFS3EAfgABTAAHU1lTWUVBUnEAfgABeHIAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAAGHcIAAAAIAAAABV0AAdfQUNDTlVNdAAQMzIwMTAwMDI3NTcxMTg4N3QAA19SV3QAAXd0AAtfVU5JVEFDQ05VTXB0AAdfUEFHRUlEdAAFc3RlcDF0AANfSVNzcgAOamF2YS5sYW5nLkxvbmc7i+SQzI8j3wIAAUoABXZhbHVleHIAEGphdmEubGFuZy5OdW1iZXKGrJUdC5TgiwIAAHhw//////5mfoh0AAxfVU5JVEFDQ05BTUV0ADnljZfkuqzmg6DkvJfkurrlipvotYTmupDmnI3liqHmnInpmZDlhazlj7jnrKzkuIDliIblhazlj7h0AAZfTE9HSVB0ABEyMDE4MDEwMzE1MzY1Njc5NnQACF9BQ0NOQU1FdAAG6LCI56uLdAAJaXNTYW1lUGVydAAFZmFsc2V0AAdfUFJPQ0lEdAAINzAwMDAwMDJ0AAtfU0VORE9QRVJJRHQAEjMyMTEwMjE5ODYwODIwMDAzNnQAEF9ERVBVVFlJRENBUkROVU10ABIzMjExMDIxOTg2MDgyMDAwMzZ0AAlfU0VORFRJTUV0AAoyMDE4LTAxLTAzdAALX0JSQU5DSEtJTkR0AAEwdAAJX1NFTkREQVRFdAAKMjAxOC0wMS0wM3QAE0NVUlJFTlRfU1lTVEVNX0RBVEVxAH4AInQABV9UWVBFdAAEaW5pdHQAB19JU0NST1BxAH4AIHQACV9QT1JDTkFNRXQAGOS4quS6uuaYjue7huS/oeaBr+afpeivonQAB19VU0JLRVlwdAAIX1dJVEhLRVlxAH4AIHh0AAhAU3lzRGF0ZXQAB0BTeXNEYXl0AAlAU3lzTW9udGh0AAhAU3lzVGltZXQACEBTeXNXZWVrdAAIQFN5c1llYXI=' datas = { 'dynamicTable_page': '/ydpx/70000002/700002_01.ydpx', 'dynamicTable_id': 'datalist2', 'dynamicTable_currentPage': 0, 'dynamicTable_nextPage': 1, 'dynamicTable_pageSize': 10, 'dynamicTable_paging': 'true', 'DATAlISTGHOST': ghosts, '_DATAPOOL_': pools, 'dynamicTable_configSqlCheck': '0', 'errorFilter': '1=1', 'begdate': str(int(time.strftime("%Y", time.localtime())) - 10) + time.strftime("-%m-%d", time.localtime()), 'enddate': time.strftime("%Y-%m-%d", time.localtime()), '_PROCID': '70000002', '_CHANNEL': 1, '_APPLY': 0, 'accname': username, 'accnum': uunum, } respDetail2 = self.s.post( "http://www.njgjj.com/dynamictable?uuid=" + times + "", datas) baseDetail = self.result_data["detail"]["data"] model = {} lastTime = "" # 最后一次汇补缴时间 lastMoney = "" # 最后一次汇补缴金额 continueCount = 0 # 汇补缴累积次数 infoDetail = json.loads(respDetail2.text)['data']['data'] for q in range(len(infoDetail)): if '汇缴' in infoDetail[q]['reason']: lastTime = infoDetail[q]['transdate'] lastMoney = infoDetail[q]['basenum'] break for p in range(len(infoDetail)): tds = infoDetail[p] years = tds['transdate'][0:4] months = tds['transdate'][5:7] if '还贷' not in tds['reason']: model = { '时间': tds['transdate'], '类型': self._converType(tds['reason']), '汇缴年月': '', '收入': tds['basenum'], '支出': '', '余额': tds['payvouamt'], '单位名称': tds['unitaccname'] } else: model = { '时间': tds['transdate'], '类型': self._converType(tds['reason']), '汇缴年月': '', '收入': '', '支出': tds['basenum'], '余额': tds['payvouamt'], '单位名称': tds['unitaccname'] } if '汇缴' in tds['reason']: continueCount = continueCount + 1 baseDetail.setdefault(years, {}) baseDetail[years].setdefault(months, []) baseDetail[years][months].append(model) # 个人基本信息 self.result_data['baseInfo'] = { '姓名': username, '证件号': personid, '证件类型': '身份证', '个人编号': uunum, '公积金帐号': soupData['cardnocsp'], '单位缴存比例': str(int(float(soupData['unitprop']) * 100)) + '%', '个人缴存比例': str(int(float(soupData['indiprop']) * 100)) + '%', '开户日期': soupData['opnaccdate'], '手机号': soupData['linkphone'], '月应缴额': soupData['amt2'], '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '城市名称': '南京市', '城市编号': '320100', '最近汇款日期': lastTime, '最近汇款金额': lastMoney, '累计汇款次数': continueCount, } # companyList self.result_data['companyList'].append({ "单位名称": soupData['_UNITACCNAME'], "单位登记号": soupData['unitaccnum'], "所属管理部编号": "", "所属管理部名称": "", "当前余额": soupData['amt1'], "帐户状态": status, "当年缴存金额": "", "当年提取金额": "", "上年结转余额": "", "最后业务日期": soupData['lpaym'], "转出金额": "" }) # identity 信息 self.result['identity'] = { "task_name": "南京", "target_name": username, "target_id": self.result_meta['证件号码'], "status": status } return except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # TODO: 执行任务,如果没有登录,则raise PermissionError #基本信息 resp = self.s.get(INFOR_URL) soup = BeautifulSoup(resp.content, 'html.parser') tables = soup.select('#ct_form')[0] data = self.result_data #帐户状态 grzh = '' for i in range( 0, len(soup.select('#PerAccState')[0].findAll('option'))): if len( soup.select('#PerAccState')[0].findAll('option') [i].attrs) == 2: grzh = soup.select('#PerAccState')[0].findAll( 'option')[i].text if grzh == '正常': self.result_identity['status'] = '缴存' else: self.result_identity['status'] = '封存' # 冻结原因 FrzRsn = '' for i in range(0, len(soup.select('#FrzRsn')[0].findAll('option'))): if len(soup.select('#FrzRsn')[0].findAll('option') [i].attrs) == 2: FrzRsn = soup.select('#FrzRsn')[0].findAll( 'option')[i].text data['baseInfo'] = { '城市名称': '济南', '城市编号': '370100', '证件号': soup.select('#CertiNum')[0].attrs['value'], '证件类型': '身份证', '个人账号': soup.select('#AccNum')[0].attrs['value'], '姓名': soup.select('#AccName')[0].attrs['value'], '帐户状态': grzh, '冻结原因': FrzRsn, '开户日期': soup.select('#OpenDate')[0].attrs['value'], '月应缴额': soup.select('#MonPaySum')[0].attrs['value'], '单位缴存比例': soup.select('#UnitProp')[0].attrs['value'] + '%', '个人缴存比例': soup.select('#IndiProp')[0].attrs['value'] + '%', '联名卡号': soup.select('#CardNo')[0].attrs['value'], '开户银行': soup.select('#AccBankName')[0].attrs['value'], '最近6个月的平均缴存基数': soup.select('#avgbasenumber')[0].attrs['value'], '连续缴存月数': soup.select('#paynum')[0].attrs['value'], '缴存基数': soup.select('#BaseNumber')[0].attrs['value'], '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '最后汇缴月': soup.select('#LastPayDate')[0].attrs['value'] } self.result_identity['target_name'] = soup.select( '#AccName')[0].attrs['value'] data['companyList'] = [] entdic = { '单位账号': soup.select('#UnitAccNum')[0].attrs['value'], '单位名称': soup.select('#UnitAccName')[0].attrs['value'], '委托代办单位编号': soup.select('#AgentUnitNo')[0].attrs['value'], '当前余额': soup.select('#Balance')[0].attrs['value'], '帐户状态': grzh } data['companyList'].append(entdic) #缴存信息 resp = self.s.get(MX_URL) soup = BeautifulSoup(resp.content, 'html.parser') inputs = soup.findAll('input') textareas = soup.findAll('textarea') cscontent = eval( soup.findAll("script")[8].text.split('=')[1].split(';')[0]) cscontent['BegDate'] = '2014-01-01' cscontent['EndDate'] = time.strftime("%Y-%m-%d", time.localtime()) resps = self.s.post( 'http://123.233.117.50:801/jnwt/command.summer?uuid=' + str(int(time.time() * 1000)), data=cscontent) soups = BeautifulSoup(resps.content, 'html.parser') datas = { 'dynamicTable_id': 'datalist', 'dynamicTable_currentPage': '0', 'dynamicTable_pageSize': '1000', 'dynamicTable_nextPage': '1', 'dynamicTable_page': '/ydpx/60020010/602010_01.ydpx', 'dynamicTable_paging': 'true', 'dynamicTable_configSqlCheck': '0', 'errorFilter': '1 = 1', 'BegDate': '2014-01-01', 'EndDate': time.strftime("%Y-%m-%d", time.localtime()), '_APPLY': inputs[2].attrs['value'], '_CHANNEL': inputs[3].attrs['value'], '_PROCID': inputs[4].attrs['value'], '_LoginType': inputs[5].attrs['value'], 'DATAlISTGHOST': textareas[0].text, '_DATAPOOL_': textareas[1].text } resp = self.s.post( LIST_URL + str(int(time.time() * 1000)), data=datas, headers={ 'X-Requested-With': 'XMLHttpRequest', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': 'application / json,text / javascript, * / *; q=0.01', 'Accept - Encoding': 'gzip,deflate', 'Accept - Language': 'zh - CN, zh;q = 0.8', 'Connection': 'keep - alive', 'Host': '123.233.117.50:801', 'Origin': 'http: // 123.233.117.50: 801', 'Referer': 'http: // 123.233.117.50: 801 / jnwt / init.summer?_PROCID = 60020010' }) soup = BeautifulSoup(resp.content, 'html.parser') listinfo = json.loads(soup.text) mingxiinfo = listinfo['data'] mxlist = mingxiinfo['data'] data['detail'] = {} data['detail']['data'] = {} years = '' months = '' hjcs = 0 hjje = '' hjrq = '' for i in range(0, int(mingxiinfo['totalCount'])): mxdic = mxlist[i] arr = [] oper = mxdic['oper'] sr = '' zc = '' if oper == '2037': oper = '年度结息' sr = mxdic['amt1'] elif oper == '1015': oper = '汇缴' sr = mxdic['amt1'] elif oper == '2001': oper = '个人开户' sr = mxdic['amt1'] elif oper == '2024': oper = '住房提取' zc = mxdic['amt1'] else: zc = mxdic['amt1'] dic = { '时间': mxdic['transdate'], '单位名称': '', '支出': zc, '收入': sr, '汇缴年月': mxdic['begindatec'], '余额': mxdic['amt2'], '类型': oper } if oper == '汇缴': hjcs = hjcs + 1 hjje = sr hjrq = mxdic['begindatec'] times = mxdic['transdate'][:7].replace('-', '') if years != times[:4]: years = times[:4] data['detail']['data'][years] = {} if months != times[-2:]: months = times[-2:] data['detail']['data'][years][months] = {} else: if months != times[-2:]: months = times[-2:] data['detail']['data'][years][months] = {} else: arr = data['detail']['data'][years][months] arr.append(dic) data['detail']['data'][years][months] = arr data['baseInfo']['最近汇缴日期'] = hjrq data['baseInfo']['最近汇缴金额'] = hjje data['baseInfo']['累计汇缴次数'] = hjcs return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # TODO: 执行任务,如果没有登录,则raise PermissionError s = json.loads(self.s.get(User_BaseInfo).text) # 个人信息导航 s2 = s[0]['url'] res = self.s.get("http://gzlss.hrssgz.gov.cn/gzlss_web" + s2) # 个人基础信息 if (len( BeautifulSoup(res.text, 'html.parser').findAll( 'table', {'class': 'comitTable'})) <= 0): raise TaskNotAvailableError("网络异常,请重新登录") return redata = BeautifulSoup(res.text, 'html.parser').findAll( 'table', {'class': 'comitTable'})[0] # 姓名等信息 redata2 = BeautifulSoup(res.text, 'html.parser').findAll( 'table', {'class': 'comitTable'})[1] # 民族等信息 # 社保明细 userNum = BeautifulSoup( self.s.get(Search_URL).text, 'html.parser').find('select', { 'id': 'aac001' }).text.replace('\n', '') # 员工编号 sixian = BeautifulSoup( self.s.get(Sixian_URL + userNum).text, 'html.parser').find('table').findAll( "tr", {'class': 'table_white_data'}) # 医疗保险明细 permedicalTotal = 0.0 HmoneyCount = 0 paraURL = "&startStr=199001&endStr=" + time.strftime( '%Y%m', time.localtime()) + "" # 医疗保险地址参数 yiliao = BeautifulSoup( self.s.get(Yiliao_URL + userNum + paraURL).text, 'html.parser') a = yiliao.find('table', { 'id': 'tableDataList' }).find('script').text if "请明天再查" in a: raise TaskNotAvailableError("您今天的缴费历史查询已经达到5次,请明天再查。") elif "找不到相关数据" in a: raise TaskNotAvailableError("抱歉,找不到相关数据。") elif "非法操作" in a: raise TaskNotAvailableError("非法操作,无法查询。") self.result_data['medical_care'] = {"data": {}} dataBaseH = self.result_data['medical_care']["data"] modelH = {} si_status = "" sidata = yiliao.find('table', {'id': 'tableDataList'}) if 'alert' not in sidata.text: if len(sidata.findAll("tr")) > 1: si_status = self._to_replace( sidata.findAll("tr")[1].findAll("td")[10].text)[ 0:2] # 缴存状态 si_com = self._to_replace( sidata.findAll("tr")[2].findAll("td")[3].text) # 缴费单位 yiliaoData = sidata.findAll("tr", {'temp': '职工社会医疗保险'}) for a in range(len(yiliaoData)): td = yiliaoData[a].findAll("td") permedicalTotal += float( re.findall(r"\d+\.?\d*", td[7].text)[0]) yearH = self._to_replace(td[1].text)[0:4] monthH = self._to_replace(td[1].text)[4:6] rangNum = int(self._to_replace(td[3].text)) HmoneyCount += rangNum for a1 in range(-1, rangNum - 1): nowtime = datetime.date( int(yearH) + (int(monthH) + a1) // 12, (int(monthH) + a1) % 12 + 1, 1).strftime('%Y%m') modelH = { '缴费单位': si_com, '缴费类型': si_status, '缴费时间': nowtime, '缴费基数': self._to_replace(td[9].text), '政府资助': re.findall(r"\d+\.?\d*", td[8].text)[0], '公司缴费': float(re.findall(r"\d+\.?\d*", td[6].text)[0]) / rangNum, '个人缴费': float(re.findall(r"\d+\.?\d*", td[7].text)[0]) / rangNum } dataBaseH.setdefault(nowtime[0:4], {}) dataBaseH[nowtime[0:4]].setdefault( nowtime[4:6], []) dataBaseH[nowtime[0:4]][nowtime[4:6]].append( modelH) else: raise TaskNotImplementedError("未查询到数据!") else: errormsg2 = sidata.text.split('(')[1].split(')')[0] raise TaskNotImplementedError(errormsg2) # 养老保险明细 self.result_data['old_age'] = {"data": {}} dataBaseE = self.result_data['old_age']["data"] modelE = {} peroldTotal = 0.0 for b in range(len(sixian) - 3): td2 = sixian[b].findAll("td") if (td2[5].text.strip() != ''): peroldTotal += float(td2[5].text) yearE = td2[0].text[0:4] monthE = td2[0].text[4:6] rangNumE = int(td2[2].text) for b1 in range(-1, rangNumE - 1): nowtime2 = datetime.date( int(yearE) + (int(monthE) + b1) // 12, (int(monthE) + b1) % 12 + 1, 1).strftime('%Y%m') modelE = { '缴费单位': td2[11].text, '缴费类型': td2[12].text, '缴费时间': nowtime2, '缴费基数': td2[3].text, '公司缴费': float(td2[4].text) / rangNumE, '个人缴费': float(td2[5].text) / rangNumE } dataBaseE.setdefault(nowtime2[0:4], {}) dataBaseE[nowtime2[0:4]].setdefault(nowtime2[4:6], []) dataBaseE[nowtime2[0:4]][nowtime2[4:6]].append(modelE) # 失业保险明细 self.result_data['unemployment'] = {"data": {}} dataBaseI = self.result_data['unemployment']["data"] modelI = {} for c in range(len(sixian) - 3): td3 = sixian[c].findAll("td") if (td3[0].text.strip() != ""): yearI = td3[0].text[0:4] monthI = td3[0].text[4:6] rangNumI = int(td3[2].text) for c1 in range(-1, rangNumI - 1): nowtime3 = datetime.date( int(yearI) + (int(monthI) + c1) // 12, (int(monthI) + c1) % 12 + 1, 1).strftime('%Y%m') modelI = { '缴费单位': td3[11].text, '缴费类型': td3[12].text, '缴费时间': nowtime3, '缴费基数': td3[3].text, '公司缴费': float(td3[6].text) / rangNumI, '个人缴费': float(td3[7].text) / rangNumI } dataBaseI.setdefault(nowtime3[0:4], {}) dataBaseI[nowtime3[0:4]].setdefault(nowtime3[4:6], []) dataBaseI[nowtime3[0:4]][nowtime3[4:6]].append(modelI) # 工伤保险明细 self.result_data['injuries'] = {"data": {}} dataBaseC = self.result_data['injuries']["data"] modelC = {} for d in range(len(sixian) - 3): td4 = sixian[d].findAll("td") if (td4[0].text.strip() != ""): yearC = td4[0].text[0:4] monthC = td4[0].text[4:6] rangNumC = int(td4[2].text) for d1 in range(-1, rangNumC - 1): nowtime4 = datetime.date( int(yearC) + (int(monthC) + d1) // 12, (int(monthC) + d1) % 12 + 1, 1).strftime('%Y%m') modelC = { '缴费单位': td4[11].text, '缴费类型': td4[12].text, '缴费时间': nowtime4, '缴费基数': td4[3].text, '公司缴费': float(td4[8].text) / rangNumC, '个人缴费': '' } dataBaseC.setdefault(nowtime4[0:4], {}) dataBaseC[nowtime4[0:4]].setdefault(nowtime4[4:6], []) dataBaseC[nowtime4[0:4]][nowtime4[4:6]].append(modelC) # 生育保险明细 self.result_data['maternity'] = {"data": {}} dataBaseB = self.result_data['maternity']["data"] modelB = {} for f in range(len(sixian) - 3): td5 = sixian[f].findAll("td") if (td5[0].text.strip() != ""): yearB = td5[0].text[0:4] monthB = td5[0].text[4:6] rangNumB = int(td5[2].text) for f1 in range(-1, rangNumB - 1): nowtime5 = datetime.date( int(yearB) + (int(monthB) + f1) // 12, (int(monthB) + f1) % 12 + 1, 1).strftime('%Y%m') modelB = { '缴费单位': td5[11].text, '缴费类型': td5[12].text, '缴费时间': nowtime5, '缴费基数': td5[3].text, '公司缴费': float(td5[9].text) / rangNumB, '个人缴费': '' } dataBaseB.setdefault(nowtime5[0:4], {}) dataBaseB[nowtime5[0:4]].setdefault(nowtime5[4:6], []) dataBaseB[nowtime5[0:4]][nowtime5[4:6]].append(modelB) # 大病保险明细 dabingData = sidata.findAll("tr", {'temp': '重大疾病医疗补助'}) self.result_data['serious_illness'] = {"data": {}} dataBaseQ = self.result_data['serious_illness']["data"] modelQ = {} if (len(dabingData) > 0): for q in range(len(dabingData)): td6 = dabingData[q].findAll("td") if (td6[0].text.strip() != ""): yearQ = self._to_replace(td[1].text)[0:4] monthQ = self._to_replace(td[1].text)[4:6] rangNumQ = int(self._to_replace(td[3].text)) for a1 in range(-1, rangNumQ - 1): nowtime6 = datetime.date( int(yearQ) + (int(monthQ) + a1) // 12, (int(monthQ) + a1) % 12 + 1, 1).strftime('%Y%m') modelQ = { '缴费单位': si_com, '缴费类型': si_status, '缴费时间': nowtime6, '缴费基数': self._to_replace(td6[9].text), '政府资助': re.findall(r"\d+\.?\d*", td6[8].text)[0], '公司缴费': float( re.findall(r"\d+\.?\d*", td6[6].text)[0]) / rangNum, '个人缴费': float( re.findall(r"\d+\.?\d*", td6[7].text)[0]) / rangNum } dataBaseQ.setdefault(nowtime6[0:4], {}) dataBaseQ[nowtime6[0:4]].setdefault( nowtime6[4:6], []) dataBaseQ[nowtime6[0:4]][nowtime6[4:6]].append( modelQ) sixiantype = "" if (len(sixian) >= 4): sixiantype = sixian[len(sixian) - 4].findAll("td")[12].text social_status = { '医疗': si_status, '养老': sixiantype, '失业': sixiantype, '工伤': sixiantype, '生育': sixiantype } # 缴费时长 EmoneyCount = sixian[len(sixian) - 3].findAll("td")[1].text EmoneyCount2 = sixian[len(sixian) - 3].findAll("td")[2].text EmoneyCount3 = sixian[len(sixian) - 3].findAll("td")[3].text EmoneyCount4 = sixian[len(sixian) - 3].findAll("td")[4].text rescount = [EmoneyCount, EmoneyCount2, EmoneyCount3, EmoneyCount4] moneyCount = max(rescount) # 个人基本信息 self.result_data['baseInfo'] = { '姓名': redata.find('input', {'id': 'aac003ss'})['value'], '身份证号': redata.find('input', {'id': 'aac002ss'})['value'], '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '城市名称': '广州市', '城市编号': '440100', '缴费时长': moneyCount, '最近缴费时间': sixian[len(sixian) - 4].findAll("td")[1].text, '开始缴费时间': sixian[0].findAll("td")[0].text, '个人养老累计缴费': peroldTotal, '个人医疗累计缴费': permedicalTotal, '五险状态': social_status, '账户状态': social_status['养老'], '个人编号': redata.find('input', {'id': 'aac001'})['value'], # '性别': redata.find('input', {'id': 'aac004ss'})['value'], # '民族': redata2.find('select', {'id': 'aac005'}).find(selected="selected").text.replace('\r', '').replace('\n', '').replace('\t', ''), # '户口性质': redata.find('input', {'id': 'aac009ss'})['value'], # '出生日期': redata.find('input', {'id': 'aac006ss'})['value'], # '单位名称': redata.find('input', {'id': 'aab069ss'})['value'], # '地址': redata2.find('input', {'id': 'bab306'})['value'], # '电子邮箱': redata2.find('input', {'id': 'bbc019'})['value'] } # identity信息 self.result_identity.update({ "task_name": "广州", "target_name": redata.find('input', {'id': 'aac003ss'})['value'], "target_id": self.result_meta['账号'], "status": social_status['养老'] }) # 暂时不用代码 # siresp=self.s.get("http://gzlss.hrssgz.gov.cn/gzlss_web"+s[1]['url']) # 四险导航 # sdata=BeautifulSoup(siresp.text,'html.parser') # 四险find信息 # hs = json.loads(self.s.get(Medical_URL).text) # 医疗保险信息 # medDetailURL=hs[0]['url'] # 医疗 # hresp=self.s.get("http://gzlss.hrssgz.gov.cn/gzlss_web"+medDetailURL) # hdata = BeautifulSoup(hresp.text, 'html.parser') # 医疗find信息 return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # TODO: 执行任务,如果没有登录,则raise PermissionError resp = self.s.get( "http://www.12333sh.gov.cn/sbsjb/wzb/sbsjbcx12.jsp", proxies={"http": "http://" + self.proxy}, timeout=30) soup = BeautifulSoup(resp.content, 'html.parser') # years = soup.find('xml', {'id': 'dataisxxb_sum3'}).findAll("jsjs") details = soup.find('xml', { 'id': 'dataisxxb_sum2' }).findAll("jsjs") if (soup.find('xml', { 'id': 'dataisxxb_sum4' }).find('jsjs2') != None): moneyTime = soup.find('xml', { 'id': 'dataisxxb_sum4' }).find('jsjs2').text else: moneyTime = len(details) # 社保缴费明细 # 养老 self.result_data['old_age'] = {"data": {}} dataBaseE = self.result_data['old_age']["data"] modelE = {} personmoney = 0.00 dt = soup.findAll("jfdwinfo") for a in range(len(details)): yearE = details[a].find('jsjs1').text[0:4] monthE = details[a].find('jsjs1').text[4:6] dataBaseE.setdefault(yearE, {}) dataBaseE[yearE].setdefault(monthE, []) modelE = { '缴费时间': details[a].find('jsjs1').text, '缴费单位': self._match_commapy(details[a].find('jsjs1').text, dt), '缴费基数': details[a].find('jsjs3').text, '缴费类型': '', '公司缴费': '', '个人缴费': details[a].find('jsjs4').text, # '实缴金额': self._match_money(details[a].find('jsjs1').text, years[a].find('jsjs1').text,years[a].find('jsjs3').text) } personmoney += float(details[a].find('jsjs4').text) dataBaseE[yearE][monthE].append(modelE) # 医疗 self.result_data['medical_care'] = {"data": {}} dataBaseH = self.result_data['medical_care']["data"] modelH = {} for b in range(len(details)): yearH = details[b].find('jsjs1').text[0:4] monthH = details[b].find('jsjs1').text[4:6] dataBaseH.setdefault(yearH, {}) dataBaseH[yearH].setdefault(monthH, []) modelH = { '缴费时间': details[b].find('jsjs1').text, '缴费单位': self._match_commapy(details[b].find('jsjs1').text, dt), '缴费基数': details[b].find('jsjs3').text, '缴费类型': '', '公司缴费': '', '个人缴费': details[b].find('jsjs6').text, } dataBaseH[yearH][monthH].append(modelH) # 失业 self.result_data['unemployment'] = {"data": {}} dataBaseI = self.result_data['unemployment']["data"] modelI = {} for c in range(len(details)): yearI = details[c].find('jsjs1').text[0:4] monthI = details[c].find('jsjs1').text[4:6] dataBaseI.setdefault(yearI, {}) dataBaseI[yearI].setdefault(monthI, []) modelI = { '缴费时间': details[c].find('jsjs1').text, '缴费单位': self._match_commapy(details[c].find('jsjs1').text, dt), '缴费基数': details[c].find('jsjs3').text, '缴费类型': '', '公司缴费': '', '个人缴费': details[c].find('jsjs8').text, } dataBaseI[yearI][monthI].append(modelI) # 工伤 self.result_data['injuries'] = {"data": {}} # 生育 self.result_data['maternity'] = {"data": {}} # 大病 self.result_data["serious_illness"] = {"data": {}} self.result_identity.update({ "task_name": "上海", "target_name": soup.find('xm').text, "target_id": self.result_meta['用户名'], "status": "" }) if (soup.find('xml', { 'id': 'dataisxxb_sum4' }).find('jsjs3') != None): personOldMoney = soup.find('xml', { 'id': 'dataisxxb_sum4' }).find('jsjs3').text else: personOldMoney = personmoney startTime = "" recentTime = "" if (len(details) != 0): startTime = details[0].find('jsjs1').text recentTime = details[len(details) - 1].find('jsjs1').text self.result['data']['baseInfo'] = { '姓名': soup.find('xm').text, '身份证号': self.result_meta['用户名'], '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '城市名称': '上海市', '城市编号': '310100', '缴费时长': moneyTime, '最近缴费时间': recentTime, '开始缴费时间': startTime, '个人养老累计缴费': personOldMoney, '个人医疗累计缴费': '', '账户状态': '' } return except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # 基本信息 resp = self.s.get(Main_URL) soup = BeautifulSoup(resp.content, 'html.parser').find('div', { 'class': 'condition_box' }).findAll('li') # 五险状态 wuxres = self.s.get( "http://218.90.158.61/person/personCBInfo.html") wxsoup = BeautifulSoup(wuxres.content, 'html.parser').findAll('dd') wuxianType = { '养老': wxsoup[33].text.replace('\r', '').replace('\n', ''), '医疗': wxsoup[38].text.replace('\r', '').replace('\n', ''), '失业': wxsoup[34].text.replace('\r', '').replace('\n', ''), '工伤': wxsoup[37].text.replace('\r', '').replace('\n', ''), '生育': wxsoup[35].text.replace('\r', '').replace('\n', ''), } status = "" if (wxsoup[33].text.replace('\r', '').replace('\n', '') == "正常参保"): status = '正常' else: status = '停缴' # 个人基本信息 self.result_data['baseInfo'] = { '姓名': soup[20].text, '身份证号': soup[57].text, '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '城市名称': '无锡', '城市编号': '320200', '缴费时长': 0, '最近缴费时间': '', '开始缴费时间': '', '个人养老累计缴费': 0, '个人医疗累计缴费': 0, '五险状态': wuxianType, '账户状态': status, '个人编码': soup[19].text, '民族': soup[58].text.replace('\r', '').replace('\n', ''), '出生日期': soup[21].text, '性别': soup[22].text.replace('\r', '').replace('\n', ''), '户口性质': soup[60].text.replace('\r', '').replace('\n', ''), '个人状态': soup[24].text.replace('\r', '').replace('\n', ''), '工作日期': soup[27].text, '手机号码': soup[75].text, '户口所在地': soup[82].text, '现居住地址': soup[83].text } # identity self.result['identity'] = { "task_name": "无锡", "target_name": soup[20].text, "target_id": self.result_meta['身份证'], "status": status } return except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # TODO: 执行任务,如果没有登录,则raise PermissionError return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # TODO: 执行任务,如果没有登录,则raise PermissionError # 个人信息 res = self.s.get( "https://gr.cdhrss.gov.cn:442/cdwsjb/personal/personalHomeAction!query.do" ) if (res.status_code != 200): raise TaskNotImplementedError("网络错误,请稍后再试!") else: s = json.loads(res.text)["fieldData"] # 社保明细 startTime = "199001" endTime = time.strftime("%Y%m", time.localtime()) # 查询结束时间 # 社保缴费明细-----养老 self.result['data']["old_age"] = {"data": {}} basedataE = self.result['data']["old_age"]["data"] modelE = {} peroldTotal = 0.0 detailEI = self.s.get( Detail_URL + "?dto['aae041']=" + startTime + "&dto['aae042']=" + endTime + "&dto['aae140_md5list']=&dto['aae140']=110") if 'lists' in json.loads(detailEI.text): sEI = json.loads( detailEI.text)['lists']['dg_payment']['list'] for a in range(len(sEI)): years = str(sEI[a]['aae002'])[0:4] months = str(sEI[a]['aae002'])[4:6] basedataE.setdefault(years, {}) basedataE[years].setdefault(months, []) modelE = { '缴费单位': sEI[a]['aab004'], '缴费时间': sEI[a]['aae002'], '缴费类型': '', '缴费基数': sEI[a]['yac004'], '公司缴费': sEI[a]['dwjfje'], '个人缴费': sEI[a]['grjfje'] #'缴费合计': sEI[a]['jfjezh'] } peroldTotal += float(sEI[a]['grjfje']) basedataE[years][months].append(modelE) else: sEI = {} self.result['data']["medical_care"] = {"data": {}} basedataH = self.result['data']["medical_care"]["data"] modelH = {} permedicalTotal = 0.0 # 社保明细-----医疗 detailHI = self.s.get( Detail_URL + "?dto['aae041']=" + startTime + "&dto['aae042']=" + endTime + "&dto['aae140_md5list']=&dto['aae140']=310") if 'lists' in json.loads(detailHI.text): sHI = json.loads( detailHI.text)['lists']['dg_payment']['list'] for b in range(len(sHI)): yearH = str(sHI[b]['aae002'])[0:4] monthH = str(sHI[b]['aae002'])[4:6] basedataH.setdefault(yearH, {}) basedataH[yearH].setdefault(monthH, []) modelH = { '缴费单位': sHI[b]['aab004'], '缴费时间': sHI[b]['aae002'], '缴费类型': '', '缴费基数': sHI[b]['yac004'], '公司缴费': sHI[b]['dwjfje'], '个人缴费': sHI[b]['hrzhje'], #'缴费合计': sHI[b]['jfjezh'] } permedicalTotal += float(sHI[b]['hrzhje']) basedataH[yearH][monthH].append(modelH) else: sHI = {} self.result['data']["unemployment"] = {"data": {}} basedataI = self.result['data']["unemployment"]["data"] modelI = {} # 社保明细-----失业 detailII = self.s.get( Detail_URL + "?dto['aae041']=" + startTime + "&dto['aae042']=" + endTime + "&dto['aae140_md5list']=&dto['aae140']=210") if 'lists' in json.loads(detailII.text): sII = json.loads( detailII.text)['lists']['dg_payment']['list'] for d in range(len(sII)): yearI = str(sII[d]['aae002'])[0:4] monthI = str(sII[d]['aae002'])[4:6] basedataI.setdefault(yearI, {}) basedataI[yearI].setdefault(monthI, []) modelI = { '缴费单位': sII[d]['aab004'], '缴费时间': sII[d]['aae002'], '缴费类型': '', '缴费基数': sII[d]['yac004'], '公司缴费': sII[d]['dwjfje'], '个人缴费': sII[d]['grjfje'], #'缴费合计': sII[d]['jfjezh'] } basedataI[yearI][monthI].append(modelI) else: sII = {} self.result['data']["injuries"] = {"data": {}} basedataC = self.result['data']["injuries"]["data"] modelC = {} # 社保明细-----工伤 detailCI = self.s.get( Detail_URL + "?dto['aae041']=" + startTime + "&dto['aae042']=" + endTime + "&dto['aae140_md5list']=&dto['aae140']=410") if 'lists' in json.loads(detailCI.text): sCI = json.loads( detailCI.text)['lists']['dg_payment']['list'] for c in range(len(sCI)): yearC = str(sCI[c]['aae002'])[0:4] monthC = str(sCI[c]['aae002'])[4:6] basedataC.setdefault(yearC, {}) basedataC[yearC].setdefault(monthC, []) modelC = { '缴费单位': sCI[c]['aab004'], '缴费时间': sCI[c]['aae002'], '缴费类型': '', '缴费基数': sCI[c]['yac004'], '公司缴费': sCI[c]['dwjfje'], '个人缴费': '', #'缴费合计': sCI[c]['jfjezh'] } basedataC[yearC][monthC].append(modelC) else: sCI = {} self.result['data']["maternity"] = {"data": {}} basedataB = self.result['data']["maternity"]["data"] modelB = {} # 社保明细-----生育 detailBI = self.s.get( Detail_URL + "?dto['aae041']=" + startTime + "&dto['aae042']=" + endTime + "&dto['aae140_md5list']=&dto['aae140']=510") if 'lists' in json.loads(detailBI.text): sBI = json.loads( detailBI.text)['lists']['dg_payment']['list'] for f in range(len(sBI)): yearB = str(sBI[f]['aae002'])[0:4] monthB = str(sBI[f]['aae002'])[4:6] basedataB.setdefault(yearB, {}) basedataB[yearB].setdefault(monthB, []) modelB = { '缴费单位': sBI[f]['aab004'], '缴费时间': sBI[f]['aae002'], '缴费类型': '', '缴费基数': sBI[f]['yac004'], '公司缴费': sBI[f]['dwjfje'], '个人缴费': '', #'缴费合计': sBI[f]['jfjezh'] } basedataB[yearB][monthB].append(modelB) else: sBI = {} # 大病缴费明细 self.result['data']["serious_illness"] = {"data": {}} basedataS = self.result['data']["serious_illness"]["data"] modelS = {} detailSI = self.s.get( Detail_URL + "?dto['aae041']=" + startTime + "&dto['aae042']=" + endTime + "&dto['aae140_md5list']=&dto['aae140']=330") if 'lists' in json.loads(detailSI.text): sSI = json.loads( detailSI.text)['lists']['dg_payment']['list'] for q in range(len(sSI)): yearQ = str(sSI[q]['aae002'])[0:4] monthQ = str(sSI[q]['aae002'])[4:6] basedataS.setdefault(yearQ, {}) basedataS[yearQ].setdefault(monthQ, []) modelS = { '缴费单位': sSI[q]['aab004'], '缴费时间': sSI[q]['aae002'], '缴费类型': '', '缴费基数': sSI[q]['yac004'], '公司缴费': sSI[q]['dwjfje'], '个人缴费': '' } basedataS[yearQ][monthQ].append(modelS) # 六险状态 stype = self.s.get( "https://gr.cdhrss.gov.cn:442/cdwsjb/personal/query/queryCZInsuranceInfoAction.do" ) stypes = BeautifulSoup(stype.text, 'html.parser').find( 'div', {'id': 'SeInfo'}) stype2 = json.loads( stypes.text.split('data')[40].split(';')[0].replace( '=', ''))['list'] yanglao = "0" yiliao = "0" shiye = "0" gongshang = "0" shengyu = "0" dabing = "0" for lx in range(len(stype2)): if (stype2[lx]['aae140'] == "110"): yanglao = stype2[lx]['aac031'] elif (stype2[lx]['aae140'] == "310"): yiliao = stype2[lx]['aac031'] elif (stype2[lx]['aae140'] == "210"): shiye = stype2[lx]['aac031'] elif (stype2[lx]['aae140'] == "410"): gongshang = stype2[lx]['aac031'] elif (stype2[lx]['aae140'] == "510"): shengyu = stype2[lx]['aac031'] elif (stype2[lx]['aae140'] == "330"): dabing = stype2[lx]['aac031'] social_Type = { '养老': self._convert_type(yanglao), '医疗': self._convert_type(yiliao), '大病': self._convert_type(dabing), '失业': self._convert_type(shiye), '工伤': self._convert_type(gongshang), '生育': self._convert_type(shengyu) } # 个人基本信息 if (s['aac031'] == "参保缴费"): status = "正常" else: status = "异常" mcount = [ len(sEI) - 1, len(sHI) - 1, len(sII) - 1, len(sCI) - 1, len(sBI) - 1 ] # 缴费时长 moneyCount = max(mcount) recentTime = "" startTime = "" if (len(sEI) > 0): recentTime = sEI[0]['aae002'] startTime = sEI[len(sEI) - 1]['aae002'] self.result_data['baseInfo'] = { '姓名': s['aac003'], '身份证号': s['aac002'], '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '城市名称': '成都', '城市编号': '510100', '缴费时长': moneyCount, '最近缴费时间': recentTime, '开始缴费时间': startTime, '个人养老累计缴费': peroldTotal, '个人医疗累计缴费': permedicalTotal, '五险状态': social_Type, '账户状态': status, '个人编号': s['aac001'], } self.result['identity'] = { "task_name": "成都", "target_name": s['aac003'], "target_id": self.result['meta']["用户名"], "status": status } #return except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch_name(self): try: # TODO: 执行任务,如果没有登录,则raise PermissionError #基本信息 resp = self.s.get(INFOR_URL, timeout=25) soup = BeautifulSoup(resp.content, 'html.parser') table = soup.select('table')[0].findAll('input') data = self.result_data data['baseInfo'] = { '城市名称': '杭州', '城市编号': '330100', '证件类型': '身份证', '证件号': table[3].attrs['value'], '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '手机号': table[0].attrs['value'], '个人账号': table[1].attrs['value'], '姓名': table[2].attrs['value'], '邮箱': table[12].attrs['value'], '用户名': table[8].attrs['value'], '地址': table[13].attrs['value'], '邮编': table[14].attrs['value'] } #企业信息 resp = self.s.get(ENRER_URL, timeout=25) soup = BeautifulSoup(resp.content, 'html.parser') table = soup.select('table')[0] enterarr = [] #企业 statearr = [] #状态 y = 1 #获取链接 timeenter = {} #明细时间对应的企业 maxtimes = [] data['companyList'] = [] for tr in table.findAll('tr'): cell = [i.text.replace(' ', '') for i in tr.find_all('td')] if len(cell): enterarr.append(cell[3]) statearr.append(cell[7]) dictenter = {'单位名称': cell[3], '当前余额': 0, '帐户状态': cell[7]} if y <= len(table.findAll('a')): urlinfo = 'http://www.hzgjj.gov.cn:8080' + table.findAll( 'a')[y].attrs['href'] resps = self.s.get(urlinfo, timeout=25) soups = BeautifulSoup(resps.content, 'html.parser') tables = soups.select('table')[0] timearr = [] for tr in tables.findAll('tr'): cells = [ i.text.replace(' ', '') for i in tr.find_all('td') ] if len(cells): timearr.append(cells[1]) timeenter.setdefault(cells[1], cell[3]) y = y + 2 dictenter.setdefault('最后业务日期', max(timearr)) maxtimes.append(max(timearr)) data['companyList'].append(dictenter) data['companyList'].sort(key=operator.itemgetter('最后业务日期'), reverse=True) if '正常' in statearr: self.result_identity['status'] = '缴存' else: self.result_identity['status'] = '封存' self.result_identity['target_name'] = data['baseInfo']['姓名'] #对账单 resp = self.s.get(YE_URL, timeout=25) soup = BeautifulSoup(resp.content, 'html.parser') yuer = soup.findAll('td')[3].text data['baseInfo'].setdefault('当前余额', yuer) data['companyList'][0]['当前余额'] = yuer axx = soup.find('a').attrs['href'] urlinfo = 'http://www.hzgjj.gov.cn:8080' + axx resps = self.s.get(urlinfo, timeout=25) soups = BeautifulSoup(resps.content, 'html.parser') tables = soups.select('input') if len(tables) > 3: data['baseInfo']['证件号'] = tables[3].attrs['value'] self.result_identity['target_id'] = tables[3].attrs['value'] data['detail'] = {} data['detail']['data'] = {} years = '' months = '' hjje = '' hjrq = '' hjcs = 0 for i in range(1998, int(datetime.datetime.now().year + 1)): datas = { 'check_ym': i, 'button1': '', 'acct_no': tables[1].attrs['value'], 'cacct_no': tables[2].attrs['value'], 'cert_code': tables[3].attrs['value'], 'fund_type': tables[4].attrs['value'], 'cname': tables[5].attrs['value'], 'flag': tables[6].attrs['value'], 'begin_date': str(i) + '0101', 'end_date': str(i) + '1231' } resp = self.s.post(DZD_URL, data=datas, timeout=25) soup = BeautifulSoup(resp.content, 'html.parser') table = soup.select('table')[0] for tr in table.findAll('tr'): cell = [i.text.replace(' ', '') for i in tr.find_all('td')] if len(cell) > 2 and cell[1]: arr = [] hj = '' hjdw = '' lx = cell[2] if '汇缴' in lx: hj = lx[-6:] lx = lx[:2] if max(maxtimes) == hj: hjrq = hj hjje = cell[3] else: lx = cell[2] if hj: hjcs = hjcs + 1 hjdw = timeenter[hj] dic = { '时间': cell[1], '单位名称': hjdw, '支出': cell[4], '收入': cell[3], '汇缴年月': hj, '余额': cell[5], '类型': lx } times = cell[1][0:6] if years != times[:4]: years = times[:4] data['detail']['data'][years] = {} if months != times[-2:]: months = times[-2:] data['detail']['data'][years][months] = {} else: if months != times[-2:]: months = times[-2:] data['detail']['data'][years][months] = {} else: arr = data['detail']['data'][years][months] arr.append(dic) data['detail']['data'][years][months] = arr data['baseInfo']['最近汇缴日期'] = hjrq data['baseInfo']['最近汇缴金额'] = hjje data['baseInfo']['累计汇缴次数'] = hjcs return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # TODO: 执行任务,如果没有登录,则raise PermissionError data=self.result_data # 五险明细jybh=yhsi0001003&aac001=3004182764&startrow=1&endrow=5&access_token= # 五险arrtype={'110':'基本养老保险','210':'失业保险','310':'基本医疗保险','410':'工伤保险','510':'生育保险'} arrtype = {'yhsi0001003': 'old_age', 'yhsi0002002': 'unemployment', 'yhsi0003002': 'medical_care', 'yhsi0004002': 'injuries', 'yhsi0005002': 'maternity'} ylsum = 0.00 yilsum = 0.00 arrMaxtime = [] arrMintime=[] arrLenMonth=[] for k, v in arrtype.items(): # 类型 data[v] = {} data[v]['data'] = {} datas={ 'jybh':k, 'aac001': self.g.aac001, 'startrow': 1, 'endrow':10000, 'access_token': '' } resp = self.s.post(FIVER_URL, data=datas, timeout=20) soup = BeautifulSoup(resp.content, 'html.parser') ylinfo = json.loads(soup.text) if not ylinfo: continue mxdic=ylinfo['output'] lenmonth=0 for i in range(0,len(mxdic)): arrs = [] cell=mxdic[i] yearkeys=cell['aae002'] dic = { '险种名称': cell['jfbz'], '缴费单位': cell['aab004'], '缴费类型': cell['aae143'] } years = yearkeys[:4] months = yearkeys[-2:] if v == 'old_age': dic.setdefault('缴费基数', cell['aic020']) dic.setdefault('缴费时间',cell['dwjfrq']) dic.setdefault('个人缴费', cell['grjfje']) dic.setdefault('公司缴费', cell['dwjfje']) ylsum = ylsum + float(cell['grjfje']) elif v == 'medical_care': dic.setdefault('缴费基数', cell['akc010']) dic.setdefault('缴费时间', cell['yae204']) dic.setdefault('个人缴费', cell['grsj']) dic.setdefault('公司缴费', cell['dwsj']) yilsum = yilsum + float(cell['grsj']) elif v == 'unemployment': dic.setdefault('缴费基数', cell['aic020']) dic.setdefault('缴费时间', cell['yae204']) dic.setdefault('个人缴费', cell['ajc030']) dic.setdefault('公司缴费', cell['ajc031']) elif v == 'injuries': dic.setdefault('缴费基数', cell['amc001']) dic.setdefault('缴费时间', cell['yae205']) dic.setdefault('个人缴费', cell['ymc244']) dic.setdefault('公司缴费', cell['ymc237']) elif v == 'maternity': dic.setdefault('缴费基数', cell['amc001']) dic.setdefault('缴费时间', cell['yae204']) dic.setdefault('个人缴费', cell['ymc244']) dic.setdefault('公司缴费', cell['ymc237']) if years not in data[v]['data'].keys(): data[v]['data'][years] = {} print(yearkeys) if months not in data[v]['data'][years].keys(): lenmonth=lenmonth+1 data[v]['data'][years][months] = {} else: arrs = data[v]['data'][years][months] arrs.append(dic) data[v]['data'][years][months] = arrs if v == 'old_age': data['baseInfo'].setdefault('个人养老累计缴费', "%.2f" % ylsum) if v == 'medical_care': data['baseInfo'].setdefault('个人医疗累计缴费', "%.2f" % yilsum) if len(data[v]['data'])>0: arrMaxtime.append(max(data[v]['data']) + max(data[v]['data'][max(data[v]['data'])])) arrMintime.append(min(data[v]['data']) + min(data[v]['data'][min(data[v]['data'])])) arrLenMonth.append(lenmonth) if len(arrLenMonth)>0: data['baseInfo'].setdefault('缴费时长', max(arrLenMonth)) else: data['baseInfo'].setdefault('缴费时长', 0) if len(arrMintime) > 0: data['baseInfo'].setdefault('开始缴费时间', min(arrMintime)) else: data['baseInfo'].setdefault('开始缴费时间', '') if len(arrMaxtime) > 0: data['baseInfo'].setdefault('最近缴费时间', min(arrMaxtime)) else: data['baseInfo'].setdefault('最近缴费时间', '') return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: return except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: # 基本信息 resp = self.s.get(INFO_URL, timeout=15) soup = BeautifulSoup(resp.content, 'html.parser') if len(soup.text) < 18: raise InvalidParamsError('第一次登录,请去官网修改密码!') info = json.loads(soup.text) data = self.result_data data['baseInfo'] = { '城市名称': '青岛', '城市编号': '370200', '证件号': info['sfz'], '证件类型': '身份证', '个人账号': info['khh'], '姓名': info['hm'], '帐户状态': info['zt'], '手机号': info['sjhm'], '开户日期': info['khrq'].replace('-', ''), '月应缴额': info['gze'], '单位缴存比例': info['dwjcbl'] + '%', '个人缴存比例': info['grjcbl'] + '%', '当前余额': info['zhye'], '联名卡号': info['kh'], '联名卡发卡行': info['hb'], '联名卡登记日期': info['djrq'], '单位月缴存额': info['dwyhjje'], '个人月缴存额': info['gryhjje'], '更新时间': time.strftime("%Y-%m-%d", time.localtime()) } self.result_identity['target_name'] = data['baseInfo']['姓名'] self.result_identity['target_id'] = data['baseInfo']['证件号'] if '正常' in data['baseInfo']['帐户状态']: self.result_identity['status'] = '缴存' else: self.result_identity['status'] = '封存' resp = self.s.get(ENTER_URL, timeout=15) soup = BeautifulSoup(resp.content, 'html.parser') enterinfo = json.loads(soup.text) data['companyList'] = [] entdic = { "单位名称": enterinfo['hm'], "单位编号": enterinfo['khh'], "单位地址": enterinfo['dz'], "经办部门": enterinfo['jbbm'], "所在市区": enterinfo['szqs'], "成立日期": enterinfo['clrq'], "组织机构代码": enterinfo['zzdm'], "单位性质": enterinfo['dwxz'], "营业执照编号": enterinfo['yyzz'], "法人资格": enterinfo['frzg'], "法人代表": enterinfo['frdb'], "发薪日": enterinfo['fxrq'], "主管单位": enterinfo['zgdw'], "单位传真": enterinfo['cz'], "单位邮编": enterinfo['yb'], '帐户状态': info['zt'], '当前余额': info['zhye'] } data['companyList'].append(entdic) #明细 datas = { 'dt': time.time() * 1000, 'm': 'grjcmx', 'start': '1900-01-01', 'end': time.strftime("%Y-%m-%d", time.localtime()), 'page': '1', 'rows': '20000', 'sort': 'csrq', 'order': 'desc' } resp = self.s.post(MINGXI_URL, data=datas, timeout=15) soup = BeautifulSoup(resp.content, 'html.parser') mingxiinfo = json.loads(soup.text) data['detail'] = {} data['detail']['data'] = {} years = '' months = '' hjtype = 0 hjcs = 0 hjje = '' hjrq = '' for i in range(0, int(mingxiinfo['total'])): mxdic = mingxiinfo['rows'][i] arr = [] dic = { '时间': mxdic['csrq'], '单位名称': mxdic['hm'], '支出': 0, '收入': str(float(mxdic['grje']) + float(mxdic['dwje'])), '汇缴年月': mxdic['ssny'], '余额': 0, '类型': mxdic['jjyyname'], '单据状态': mxdic['ztname'], '单位金额': mxdic['dwje'], '个人金额': mxdic['grje'], '结算方式': mxdic['jslxname'] } if mxdic['ssny']: hjcs = hjcs + 1 if hjtype == 0: hjtype = 1 hjje = str(float(mxdic['grje']) + float(mxdic['dwje'])) hjrq = mxdic['ssny'] times = mxdic['csrq'][:7].replace('-', '') if years != times[:4]: years = times[:4] data['detail']['data'][years] = {} if months != times[-2:]: months = times[-2:] data['detail']['data'][years][months] = {} else: if months != times[-2:]: months = times[-2:] data['detail']['data'][years][months] = {} else: arr = data['detail']['data'][years][months] arr.append(dic) data['detail']['data'][years][months] = arr data['baseInfo']['最近汇缴日期'] = hjrq data['baseInfo']['最近汇缴金额'] = hjje data['baseInfo']['累计汇缴次数'] = hjcs return except PermissionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: self.result_data['baseInfo'] = {} self.result_data['detail'] = {"data": {}} self.result_data['companyList'] = [] resp = self.s.get(MAIN_URL) soup = BeautifulSoup(resp.content, 'html.parser') datas = soup.select('.table-content') #公积金明细 lastTime = "" # 最后一次汇补缴时间 lastMoney = "" # 最后一次汇补缴金额 continueCount = 0 # 汇补缴累积次数 params = "&aaxmlrequest=true¤tPage=1&startTime_a=2000-01-01&startTime_b=" + time.strftime( "%Y-%m-%d", time.localtime()) + "" resp2 = self.s.get( "http://zxcx.gygjj.gov.cn/PersonAccountsList.do?method=list" + params) soup2 = BeautifulSoup(resp2.text, 'html.parser') baseDetail = self.result_data["detail"]["data"] model = {} models = {} company = BeautifulSoup( soup2.text, 'html.parser').findAll('table')[0].findAll('td') pages = BeautifulSoup( soup2.text, 'html.parser').findAll('table')[5].findAll('b')[1].text if (int(pages) == 1): data_list = BeautifulSoup(soup2.text, 'html.parser').find( 'table', {'id': 'extjsp_div_data_table_0'}) trs = data_list.findAll("tr") for tr in range(len(trs)): tds = trs[tr].findAll("td") years = tds[0].text[0:4] months = tds[0].text[5:7] model = { '时间': tds[0].text.replace('.', '-'), '类型': self._short_type(tds[1].text), '汇缴年月': tds[2].text.replace('.', ''), '收入': tds[3].text.replace(',', ''), '支出': tds[4].text.replace(',', ''), '余额': tds[5].text.replace(',', ''), '单位名称': company[1].text.split(':')[1] } if '汇缴' in tds[1].text or '补缴' in tds[1].text: lastTime = tds[0].text.replace('.', '-') lastMoney = tds[3].text.replace(',', '') continueCount = continueCount + 1 baseDetail.setdefault(years, {}) baseDetail[years].setdefault(months, []) baseDetail[years][months].append(model) else: for pg in range(1, int(pages) + 1): paramss = "&aaxmlrequest=true¤tPage=" + str( pg ) + "&startTime_a=2000-01-01&startTime_b=" + time.strftime( "%Y-%m-%d", time.localtime()) + "" resp2s = self.s.get( "http://zxcx.gygjj.gov.cn/PersonAccountsList.do?method=list" + paramss) soup2s = BeautifulSoup(resp2s.text, 'html.parser') data_lists = BeautifulSoup( soup2s.text, 'html.parser').find('table', {'id': 'extjsp_div_data_table_0'}) trss = data_lists.findAll("tr") for tr2 in range(len(trss)): tdss = trss[tr2].findAll("td") yearss = tdss[0].text[0:4] monthss = tdss[0].text[5:7] models = { '时间': tdss[0].text.replace('.', '-'), '类型': self._short_type(tdss[1].text), '汇缴年月': tdss[2].text.replace('.', ''), '收入': tdss[3].text.replace(',', ''), '支出': tdss[4].text.replace(',', ''), '余额': tdss[5].text.replace(',', ''), '单位名称': company[1].text.split(':')[1] } if '汇缴' in tdss[1].text or '补缴' in tdss[1].text: lastTime = tdss[0].text.replace('.', '-') lastMoney = tdss[3].text.replace(',', '') continueCount = continueCount + 1 baseDetail.setdefault(yearss, {}) baseDetail[yearss].setdefault(monthss, []) baseDetail[yearss][monthss].append(models) status = "" if (datas[8].findAll("td")[1].text == "正常汇缴"): status = "缴存" else: status = "封存" self.result_data['baseInfo'] = { '姓名': datas[0].findAll("td")[3].text, '证件号': datas[0].findAll("td")[5].text, '证件类型': '身份证', '公积金帐号': datas[0].findAll("td")[1].text, '性别': datas[1].findAll("td")[1].text, '手机号': datas[1].findAll("td")[3].text, '公积金卡号': datas[1].findAll("td")[5].text, '缴存基数': datas[3].findAll("td")[1].text.replace('¥', '').replace('元', ''), '单位缴存比例': datas[4].findAll("td")[1].text, '个人缴存比例': datas[4].findAll("td")[3].text, '单位月缴存额': datas[5].findAll("td")[1].text.replace('¥', '').replace('元', ''), '个人月缴存额': datas[5].findAll("td")[3].text.replace('¥', '').replace('元', ''), '开户日期': datas[7].findAll("td")[1].text, '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '城市名称': '贵阳市', '城市编号': '520100', '最近汇款日期': lastTime, '最近汇款金额': lastMoney, '累计汇款次数': continueCount # '汇缴状态': datas[8].findAll("td")[1].text, # '月应缴额':datas[3].findAll("td")[3].text.replace('¥','').replace('元',''), # '起缴年月':datas[7].findAll("td")[3].text, # '职工汇缴年月':datas[7].findAll("td")[5].text, # '所属管理部':datas[8].findAll("td")[3].text, # '是否冻结':datas[9].findAll("td")[1].text, # '是否贷款':datas[9].findAll("td")[5].text, # '单位经办人':datas[11].findAll("td")[1].text, # '单位法人':datas[11].findAll("td")[3].text, # '单位地址':datas[12].findAll("td")[1].text } self.result_data['companyList'].append({ "单位名称": company[1].text.split(':')[1], "单位登记号": company[0].text.split(":")[1], "所属管理部编号": "", "所属管理部名称": "", "当前余额": datas[9].findAll("td")[3].text.replace('¥', '').replace('元', ''), "帐户状态": status, "当年缴存金额": "", "当年提取金额": "", "上年结转余额": "", "最后业务日期": datas[7].findAll("td")[5].text, "转出金额": "" }) # identity 信息 self.result['identity'] = { "task_name": "贵阳", "target_name": datas[0].findAll("td")[3].text, "target_id": self.result_meta['身份证号'], "status": status } return except InvalidConditionError as e: raise PreconditionNotSatisfiedError(e)
def _unit_fetch(self): try: data = self.result_data # 基本信息 resp = self.s.post(INFO_URL, timeout=5) soup = BeautifulSoup(resp.content, 'html.parser') infos = json.loads(soup.text) data['baseInfo'] = { '城市名称': '厦门', '城市编号': '350200', '更新时间': time.strftime("%Y-%m-%d", time.localtime()), '证件类型': '身份证', '证件号': self.s.idnum, '个人账号': infos['Person']['custAcct'], '姓名': infos['Person']['custName'], '开户银行': infos['Person']['aaa103'], '开户网点': infos['Person']['bankOrgName'], '开户日期': infos['Person']['openDate'].replace('年', '').replace( '月', '').replace('日', '') } self.result_identity['target_name'] = data['baseInfo']['姓名'] acctStatus = '封存' if infos['Person']['acctStatus'] == '0': acctStatus = '缴存' self.result_identity['status'] = acctStatus data['companyList'] = [] diclist = { '单位名称': infos['Person']['compName'], '当前余额': infos['Person']['bal'], '帐户状态': acctStatus, '最后业务日期': infos['Person']['lastAutoAcctDate'].replace('年', '').replace( '月', '').replace('日', '') } data['companyList'].append(diclist) # 明细信息 datas = { 'custAcct': data['baseInfo']['个人账号'], 'startDate': data['baseInfo']['开户日期'], 'endDate': time.strftime("%Y-%m-%d", time.localtime()).replace('-', '') } resp = self.s.post(MX_URL, data=datas, timeout=5) soup = BeautifulSoup(resp.content, 'html.parser') infos = json.loads(soup.text) data['detail'] = {} data['detail']['data'] = {} years = '' months = '' for i in range(0, int(infos['totalRecords'])): arr = [] dicinfo = infos['list'][i] dic = { '时间': dicinfo['bankAcctDate'], '单位名称': '', '支出': 0, '收入': float(dicinfo['creditAmt']), '汇缴年月': '', '余额': dicinfo['saveBal'], '类型': dicinfo['bankSumy'] } times = dicinfo['bankAcctDate'].replace('年', '').replace( '月', '').replace('日', '') if years != times[:4]: years = times[:4] data['detail']['data'][years] = {} if months != times[4:6]: months = times[4:6] data['detail']['data'][years][months] = {} else: if months != times[4:6]: months = times[4:6] data['detail']['data'][years][months] = {} else: arr = data['detail']['data'][years][months] arr.append(dic) data['detail']['data'][years][months] = arr print(arr) return except PermissionError as e: raise PreconditionNotSatisfiedError(e)