class Query(object): def __init__(self): self.request = Singleton.GetInstance() self.parser = Parser self.Re = Re() def query(self): CreditUrl = 'https://ipcrs.pbccrc.org.cn/simpleReport.do?method=viewReport' CreditHeader = { 'Referer': 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=queryReport', 'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0' } CreditData = { 'counttime': '', 'reportformat': '21', 'tradeCode': 'xubjjc' } CreditReponse = self.request.post(CreditUrl, headers=CreditHeader, data=CreditData, verify=False) #这个报告要存一份到原始数据库 CreditReponse = CreditReponse.content.decode('gbk') #调用解析器解析并放入数据库 CodeError = self.Re.reFind(CreditReponse, r'(查询码输入错误,请重新输入)') if CodeError: print('查询码输入错误') exit() self.parser.parser(self, CreditReponse) print(type(CreditReponse)) print('')
class Apply(object): def __init__(self): self.request = Singleton.GetInstance() self.Re = Re() def apply(self): try: #获取第一个申请页面 firsturl = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=applicationReport' firstheader = { 'Referer': 'https://ipcrs.pbccrc.org.cn/menu.do', 'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0' } firstresponse = self.request.get(firsturl, headers=firstheader, verify=False).content.decode('gbk') #获取第一个页面的token tokenfirst = self.Re.reFind(firstresponse, r'TOKEN" value="(.*?)"') askdata = { 'org.apache.struts.taglib.html.TOKEN': tokenfirst, 'method':'checkishasreport', 'authtype': '2', 'ApplicationOption': '25', 'ApplicationOption': '24', 'ApplicationOption': '21' } askurl = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=checkishasreport' askheaders = { 'Referer':'https://ipcrs.pbccrc.org.cn/reportAction.do?method=applicationReport', 'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0' } #获取答题页面 askresponse = self.request.post(askurl, headers=askheaders, data=askdata, verify=False).content.decode('gbk') soup = BeautifulSoup(askresponse,'lxml') lis = soup.find_all('li') asklist = [] for li in lis: # Problem = BeautifulSoup.find(li,'p').text # asklist.append(Problem) Answers = BeautifulSoup.find_all(li,'span') for Answer in Answers: asklist.append(Answer.text) #写入到数据库 print (asklist) #等待客户输入答案 param = self.Re.reFindAll(askresponse, r'value="(.*?)">') if len(param) == 0: print('获取失败') submitdata = { 'org.apache.struts.taglib.html.TOKEN': param[0], 'method':'', 'authtype': '2', 'ApplicationOption': '25', 'ApplicationOption': '24', 'ApplicationOption': '21', 'kbaList[0].derivativecode': param[1], 'kbaList[0].businesstype':param[2], 'kbaList[0].questionno':param[3], 'kbaList[0].kbanum': param[4], 'kbaList[0].question': param[5], 'kbaList[0].options1': param[6], 'kbaList[0].options2': param[7], 'kbaList[0].options3': param[8], 'kbaList[0].options4': param[9], 'kbaList[0].options5': param[10], 'kbaList[0].answerresult':'1', # 'kbaList[0].options': '1', 'kbaList[1].derivativecode': param[11], 'kbaList[1].businesstype':param[12], 'kbaList[1].questionno': param[13], 'kbaList[1].kbanum': param[14], 'kbaList[1].question': param[15], 'kbaList[1].options1':param[16], 'kbaList[1].options2': param[17], 'kbaList[1].options3': param[18], 'kbaList[1].options4': param[19], 'kbaList[1].options5': param[20], 'kbaList[1].answerresult': '1', 'kbaList[1].options': '1', 'kbaList[2].derivativecode': param[21], 'kbaList[2].businesstype': param[22], 'kbaList[2].questionno': param[23], 'kbaList[2].kbanum': param[24], 'kbaList[2].question': param[25], 'kbaList[2].options1': param[26], 'kbaList[2].options2': param[27], 'kbaList[2].options3': param[28], 'kbaList[2].options4': param[29], 'kbaList[2].options5': param[30], 'kbaList[2].answerresult': '1', 'kbaList[2].options': '1', 'kbaList[3].derivativecode': param[31], 'kbaList[3].businesstype': param[32], 'kbaList[3].questionno': param[33], 'kbaList[3].kbanum': param[34], 'kbaList[3].question': param[35], 'kbaList[3].options1': param[36], 'kbaList[3].options2': param[37], 'kbaList[3].options3': param[38], 'kbaList[3].options4': param[39], 'kbaList[3].options5': param[40], 'kbaList[3].answerresult': '1', 'kbaList[3].options': '1', 'kbaList[4].derivativecode': param[41], 'kbaList[4].businesstype': param[42], 'kbaList[4].questionno': param[43], 'kbaList[4].kbanum': param[44], 'kbaList[4].question': param[45], 'kbaList[4].options1': param[46], 'kbaList[4].options2': param[47], 'kbaList[4].options3': param[48], 'kbaList[4].options4': param[49], 'kbaList[4].options5': param[50], 'kbaList[4].answerresult': '1', 'kbaList[4].options': '1', } submitheader = { 'Referer': 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=checkishasreport', 'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0' } submitUrl = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=submitKBA' submitresponse = self.request.post(submitUrl, headers=submitheader, data=submitdata, verify=False) submitresponse = submitresponse.content.decode('gbk') compileResult = self.Re.reFind(submitresponse,r'(您于.*?申请正在受理,请耐心等待。)') Result = self.Re.reFind(submitresponse, r'(您的查询.*?获取结果)') if compileResult: print(compileResult) if Result: print(Result) except Exception as e: print(e)
class Login(object): def __init__(self): self.request = Singleton.GetInstance() self.getcpatcha = getCaptcha() self.Re = Re() def login(self, username, password): firstHeader = { 'User-Agent': 'Mozilla / 5.0(Windows NT 6.1;WOW64;rv:52.0) Gecko / 20100101 Firefox / 52.0', 'Referer': 'https://ipcrs.pbccrc.org.cn/index1.do' } #获取第一个页面的token firstUrl = 'https://ipcrs.pbccrc.org.cn/login.do?method=initLogin' firstresponse = self.request.get(firstUrl, headers=firstHeader, verify=False).content.decode('gbk') tokenfirst = self.Re.reFind(firstresponse, r'TOKEN" value="(.*?)"') #验证码下载预测 captcha = self.getcpatcha.predict() #获取登陆页面 loginpostdata = { 'org.apache.struts.taglib.html.TOKEN': tokenfirst, 'method': 'login', 'data': time.time() * 1000, 'loginname': username, 'password': password, '_@IMGRC@_': captcha, } loginHeader = { 'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0', 'Referer': 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp' } try: response = self.request.post( 'https://ipcrs.pbccrc.org.cn/login.do', headers=loginHeader, data=loginpostdata, verify=False) if response.status_code == 200: response = response.content.decode('gbk') compileerror = self.Re.reFind(response, r'(因登录名与密码.*?分钟。)') compilePassword = self.Re.reFind(response, r'(登录名或密码错误)') if compileerror: #错误登陆5次 #把状态码和错误原因存入数据库然后退出 print(compileerror) exit() elif compilePassword: # 密码错误 print(compilePassword) exit() #验证码输入错误 compileCaptchaError = self.Re.reFind(response, r'(验证码输入错误)') while compileCaptchaError: captcha = self.getcpatcha.predict() loginpostdata = { 'org.apache.struts.taglib.html.TOKEN': tokenfirst, 'method': 'login', 'data': time.time() * 1000, 'loginname': username, 'password': password, '_@IMGRC@_': captcha, } response = self.request.post( 'https://ipcrs.pbccrc.org.cn/login.do', headers=loginHeader, data=loginpostdata, verify=False) response = response.content.decode('gbk') compileCaptchaError = self.Re.reFind( response, r'(验证码输入错误)') print('登陆成功') else: #存入数据库失败原因 exit() except Exception as e: print(e)
def parser(self, html): Credit = {} selector = etree.HTML(html) #基本信息 basicInfo = {} infos = selector.xpath(r'//tr[2]/td/table[1]//td/strong/text()') for info in infos: temp = info.split(':') basicInfo[temp[0]] = temp[1] infos = selector.xpath(r'//tr[2]/td/table[2]//td/strong/text()') for info in infos: temp = info.split(':') if len(temp) == 1: basicInfo['婚否'] = temp[0] else: basicInfo[temp[0]] = temp[1] Credit['基本信息'] = basicInfo #信贷记录 xindai = {} xindai = {} infos = selector.xpath(r'//tr[2]/td/table[3]//td/strong/text()') xindai['注释'] = infos[1].replace('\xa0','') infos = selector.xpath(r'//tr[2]/td/table[4]//td/text()') listinfo = ['信息概要'] for info in infos: info = removeSymbol.removesymbol(self, info) if info != '': listinfo.append(info) #sinfos = str(infos).replace('\\n','').replace('\\r','').replace('\\t','').replace('\\`\\`','') xindai['信息概要'] = listinfo infos = selector.xpath(r'//tr[2]/td/table[4]//tr[1]/td[2]//span/text()') infofull = '' for info in infos: info = removeSymbol.removesymbol(self, info) if info != '': infofull += info xindai['逾期记录'] = infofull Credit['信贷记录'] = xindai #信用卡 CreditCard = {} infos = selector.xpath(r'//div/div/table//tr[2]/td/ol[1]//text()') infolist = [] #去除一些空的项 for info in infos: info = removeSymbol.removesymbol(self, info) if info != '': infolist.append(info) key = infolist[0] infolist = infolist[1:] jsonlist = [] for info in infolist: info = removeSymbol.removesymbol(self, info) if info != '': json = {} json['发卡时间'] = Re.reFind(self, info, r'(\d+年\d+月\d+日)') json['发卡行'] = Re.reFind(self, info, r'日(.*?)截') json['截止时间'] = Re.reFind(self, info, r'截至(.*?),信用') print(Re.reFind(self, info, r'信用额度(.*?),')) json['信用额度'] = Re.reFind(self, info, r'信用额度(.*?),') json['已使用额度'] = Re.reFind(self, info, r'0,(.*?)。') jsonlist.append(json) CreditCard = {key:jsonlist} Credit['信用卡'] = CreditCard print(Credit) #公共记录 recordlist = [] infos = selector.xpath(r'//tr[2]/td/table[5]//text()') for info in infos: info = removeSymbol.removesymbol(self, info) if info != '': recordlist.append(info) recordlist = recordlist[1:] Credit['公共记录'] = recordlist #查询记录 qrecordlist = [] infos = selector.xpath(r'//tr[2]/td/table[7]//text()') for info in infos: info = removeSymbol.removesymbol(self, info) if info != '': qrecordlist.append(info) Credit['查询记录'] = qrecordlist print (Credit)
class Register(object): def __init__(self): self.request = Singleton.GetInstance() self.Re = Re() self.getcpatcha = getCaptcha() self.namecheck = nameCheck() def register(self): firstUrl = 'https://ipcrs.pbccrc.org.cn/login.do?method=initLogin' firstHeader = { 'User-Agent': 'Mozilla / 5.0(Windows NT 6.1;WOW64;rv:52.0) Gecko / 20100101 Firefox / 52.0', 'Referer': 'https://ipcrs.pbccrc.org.cn/index1.do' } firstResponse = self.request.get(firstUrl, headers=firstHeader, verify=False) if firstResponse.status_code == 200: firstResponse = firstResponse.content.decode('gbk') tokenfirst = self.Re.reFind(firstResponse, r'TOKEN" value="(.*?)"') print('第一个页面请求成功') #获取第二个页面 secondUrl = 'https://ipcrs.pbccrc.org.cn/userReg.do' secondHeader = { 'User-Agent': 'Mozilla / 5.0(Windows NT 6.1;WOW64;rv:52.0) Gecko / 20100101 Firefox / 52.0', 'Referer': 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp' } secondData = { 'org.apache.struts.taglib.html.TOKEN': tokenfirst, 'method': 'initReg' } secondResponse = self.request.post(secondUrl, headers=secondHeader, data=secondData, verify=False) if secondResponse.status_code == 200: secondResponse = secondResponse.content.decode('gbk') tokensecond = self.Re.reFind(secondResponse, r'TOKEN" value="(.*?)"') print('第二个页面请求成功') #获取第三个页面 thirdUrl = 'https://ipcrs.pbccrc.org.cn/userReg.do' captcha = self.getcpatcha.predict() thirdHeader = { 'Referer': 'https://ipcrs.pbccrc.org.cn/userReg.do', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', #'Accept': 'text/html, application xhtml+xml, application/xml;q=0.9,image/webp,*/*;q=0.8', #'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Language': 'zh-CN,zh;q = 0.8', # 'Cache-Control': 'max-age=0', #'Connection': 'keep-alive', # 'Content-Length': '203', #'Host': 'ipcrs.pbccrc.org.cn', 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', # 'Origin': 'https://ipcrs.pbccrc.org.cn', #'Upgrade-Insecure-Requests': '1' } thirdData = { 'org.apache.struts.taglib.html.TOKEN': tokensecond, 'method': 'checkIdentity', 'userInfoVO.name': '王大丽', 'userInfoVO.certType': '0', 'userInfoVO.certNo': '532129198806123369', '_@IMGRC@_': captcha, '1': 'on' } # thirdData = urllib.parse.urlencode(thirdData) thirdResponse = self.request.post(thirdUrl, headers=thirdHeader, data=thirdData, verify=False) thirdResponse = thirdResponse.content.decode('gbk') compileCaptchaError = self.Re.reFind(thirdResponse, r'(验证码输入错误)') while compileCaptchaError: captcha = self.getcpatcha.predict() thirdData = { 'org.apache.struts.taglib.html.TOKEN': tokensecond, 'method': 'checkIdentity', 'userInfoVO.name': '杨帆', 'userInfoVO.certType': '0', 'userInfoVO.certNo': '510184198907130057', '_@IMGRC@_': captcha, '1': 'on' } thirdresponse = self.request.post(thirdUrl, headers=thirdHeader, data=(thirdData), verify=False) thirdresponse = thirdresponse.content.decode('gbk') compileCaptchaError = self.Re.reFind(thirdresponse, r'(验证码输入错误)') print('') result = self.namecheck.namecheck('yy80188815') dmtUrl = 'https://ipcrs.pbccrc.org.cn/userReg.do' dtmHeader = { 'User-Agent': 'Mozilla / 5.0(Windows NT 6.1;WOW64;rv:52.0) Gecko / 20100101 Firefox / 52.0', 'Referer': 'ipcrs.pbccrc.org.cn/userReg.do' } dtmData = {'method': 'getAcvitaveCode', 'mobileTel': '18980920233'} dtmResponse = self.request.post(secondUrl, headers=dtmHeader, data=dtmData, verify=False)