Exemplos de Re.reFind em Python, exemplos de Util.Re.Re.reFind em Python

Exemplo n.º 1

0

Exibir arquivo

class Query(object):
    def __init__(self):
        self.request = Singleton.GetInstance()
        self.parser = Parser
        self.Re = Re()

    def query(self):

        CreditUrl = 'https://ipcrs.pbccrc.org.cn/simpleReport.do?method=viewReport'
        CreditHeader = {
            'Referer':
            'https://ipcrs.pbccrc.org.cn/reportAction.do?method=queryReport',
            'User-Agent':
            'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0'
        }

        CreditData = {
            'counttime': '',
            'reportformat': '21',
            'tradeCode': 'xubjjc'
        }
        CreditReponse = self.request.post(CreditUrl,
                                          headers=CreditHeader,
                                          data=CreditData,
                                          verify=False)
        #这个报告要存一份到原始数据库
        CreditReponse = CreditReponse.content.decode('gbk')
        #调用解析器解析并放入数据库
        CodeError = self.Re.reFind(CreditReponse, r'(查询码输入错误，请重新输入)')
        if CodeError:
            print('查询码输入错误')
            exit()
        self.parser.parser(self, CreditReponse)
        print(type(CreditReponse))
        print('')

Exemplo n.º 2

0

Exibir arquivo

Arquivo: Apply.py Projeto: walleleung/SpiderSystem

class Apply(object):
    def __init__(self):
        self.request = Singleton.GetInstance()
        self.Re = Re()
    def apply(self):
        try:
            #获取第一个申请页面
            firsturl = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=applicationReport'
            firstheader = {
                'Referer': 'https://ipcrs.pbccrc.org.cn/menu.do',
                'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0'
            }
            firstresponse = self.request.get(firsturl, headers=firstheader, verify=False).content.decode('gbk')

            #获取第一个页面的token
            tokenfirst = self.Re.reFind(firstresponse, r'TOKEN" value="(.*?)"')

            askdata = {
                'org.apache.struts.taglib.html.TOKEN': tokenfirst,
                'method':'checkishasreport',
                'authtype': '2',
                'ApplicationOption': '25',
                'ApplicationOption': '24',
                'ApplicationOption': '21'
            }
            askurl = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=checkishasreport'
            askheaders = {
                'Referer':'https://ipcrs.pbccrc.org.cn/reportAction.do?method=applicationReport',
                'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0'
            }

            #获取答题页面
            askresponse = self.request.post(askurl, headers=askheaders, data=askdata, verify=False).content.decode('gbk')
            soup = BeautifulSoup(askresponse,'lxml')
            lis = soup.find_all('li')
            asklist = []
            for li in lis:
                # Problem = BeautifulSoup.find(li,'p').text
                # asklist.append(Problem)
                Answers = BeautifulSoup.find_all(li,'span')
                for Answer in Answers:
                    asklist.append(Answer.text)
            #写入到数据库
            print (asklist)

            #等待客户输入答案
            param = self.Re.reFindAll(askresponse, r'value="(.*?)">')
            if len(param) == 0:
                print('获取失败')
            submitdata = {
                'org.apache.struts.taglib.html.TOKEN': param[0],
                'method':'',
                'authtype': '2',
                'ApplicationOption': '25',
                'ApplicationOption': '24',
                'ApplicationOption': '21',
                'kbaList[0].derivativecode': param[1],
                'kbaList[0].businesstype':param[2],
                'kbaList[0].questionno':param[3],
                'kbaList[0].kbanum': param[4],
                'kbaList[0].question': param[5],
                'kbaList[0].options1': param[6],
                'kbaList[0].options2': param[7],
                'kbaList[0].options3': param[8],
                'kbaList[0].options4': param[9],
                'kbaList[0].options5': param[10],
                'kbaList[0].answerresult':'1',          #
                'kbaList[0].options': '1',
                'kbaList[1].derivativecode': param[11],
                'kbaList[1].businesstype':param[12],
                'kbaList[1].questionno': param[13],
                'kbaList[1].kbanum': param[14],
                'kbaList[1].question': param[15],
                'kbaList[1].options1':param[16],
                'kbaList[1].options2': param[17],
                'kbaList[1].options3': param[18],
                'kbaList[1].options4': param[19],
                'kbaList[1].options5': param[20],
                'kbaList[1].answerresult': '1',
                'kbaList[1].options': '1',
                'kbaList[2].derivativecode': param[21],
                'kbaList[2].businesstype': param[22],
                'kbaList[2].questionno': param[23],
                'kbaList[2].kbanum': param[24],
                'kbaList[2].question': param[25],
                'kbaList[2].options1': param[26],
                'kbaList[2].options2': param[27],
                'kbaList[2].options3': param[28],
                'kbaList[2].options4': param[29],
                'kbaList[2].options5': param[30],
                'kbaList[2].answerresult': '1',
                'kbaList[2].options': '1',
                'kbaList[3].derivativecode': param[31],
                'kbaList[3].businesstype': param[32],
                'kbaList[3].questionno': param[33],
                'kbaList[3].kbanum': param[34],
                'kbaList[3].question': param[35],
                'kbaList[3].options1': param[36],
                'kbaList[3].options2': param[37],
                'kbaList[3].options3': param[38],
                'kbaList[3].options4': param[39],
                'kbaList[3].options5': param[40],
                'kbaList[3].answerresult': '1',
                'kbaList[3].options': '1',
                'kbaList[4].derivativecode': param[41],
                'kbaList[4].businesstype': param[42],
                'kbaList[4].questionno': param[43],
                'kbaList[4].kbanum': param[44],
                'kbaList[4].question': param[45],
                'kbaList[4].options1': param[46],
                'kbaList[4].options2': param[47],
                'kbaList[4].options3': param[48],
                'kbaList[4].options4': param[49],
                'kbaList[4].options5': param[50],
                'kbaList[4].answerresult': '1',
                'kbaList[4].options': '1',
            }
            submitheader = {
                'Referer': 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=checkishasreport',
                'User-Agent': 'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0'
            }
            submitUrl = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=submitKBA'

            submitresponse = self.request.post(submitUrl, headers=submitheader, data=submitdata, verify=False)
            submitresponse = submitresponse.content.decode('gbk')
            compileResult = self.Re.reFind(submitresponse,r'(您于.*?申请正在受理，请耐心等待。)')
            Result = self.Re.reFind(submitresponse, r'(您的查询.*?获取结果)')
            if compileResult:
                print(compileResult)
            if Result:
                print(Result)
        except Exception as e:
            print(e)

Exemplo n.º 3

0

Exibir arquivo

class Login(object):
    def __init__(self):
        self.request = Singleton.GetInstance()
        self.getcpatcha = getCaptcha()
        self.Re = Re()

    def login(self, username, password):

        firstHeader = {
            'User-Agent':
            'Mozilla / 5.0(Windows NT 6.1;WOW64;rv:52.0) Gecko / 20100101 Firefox / 52.0',
            'Referer': 'https://ipcrs.pbccrc.org.cn/index1.do'
        }
        #获取第一个页面的token
        firstUrl = 'https://ipcrs.pbccrc.org.cn/login.do?method=initLogin'
        firstresponse = self.request.get(firstUrl,
                                         headers=firstHeader,
                                         verify=False).content.decode('gbk')
        tokenfirst = self.Re.reFind(firstresponse, r'TOKEN" value="(.*?)"')

        #验证码下载预测
        captcha = self.getcpatcha.predict()
        #获取登陆页面
        loginpostdata = {
            'org.apache.struts.taglib.html.TOKEN': tokenfirst,
            'method': 'login',
            'data': time.time() * 1000,
            'loginname': username,
            'password': password,
            '_@IMGRC@_': captcha,
        }

        loginHeader = {
            'User-Agent':
            'Mozilla/5.0(Windows NT 6.1;WOW64;rv:52.0)Gecko/20100101Firefox/52.0',
            'Referer': 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp'
        }
        try:
            response = self.request.post(
                'https://ipcrs.pbccrc.org.cn/login.do',
                headers=loginHeader,
                data=loginpostdata,
                verify=False)
            if response.status_code == 200:
                response = response.content.decode('gbk')
                compileerror = self.Re.reFind(response, r'(因登录名与密码.*?分钟。)')
                compilePassword = self.Re.reFind(response, r'(登录名或密码错误)')
                if compileerror:
                    #错误登陆5次
                    #把状态码和错误原因存入数据库然后退出
                    print(compileerror)
                    exit()

                elif compilePassword:
                    # 密码错误
                    print(compilePassword)
                    exit()

                #验证码输入错误
                compileCaptchaError = self.Re.reFind(response, r'(验证码输入错误)')
                while compileCaptchaError:
                    captcha = self.getcpatcha.predict()
                    loginpostdata = {
                        'org.apache.struts.taglib.html.TOKEN': tokenfirst,
                        'method': 'login',
                        'data': time.time() * 1000,
                        'loginname': username,
                        'password': password,
                        '_@IMGRC@_': captcha,
                    }
                    response = self.request.post(
                        'https://ipcrs.pbccrc.org.cn/login.do',
                        headers=loginHeader,
                        data=loginpostdata,
                        verify=False)
                    response = response.content.decode('gbk')
                    compileCaptchaError = self.Re.reFind(
                        response, r'(验证码输入错误)')
                print('登陆成功')
            else:
                #存入数据库失败原因
                exit()
        except Exception as e:
            print(e)

Exemplo n.º 4

0

Exibir arquivo

    def parser(self, html):
      Credit = {}
      selector = etree.HTML(html)
      #基本信息
      basicInfo = {}
      infos = selector.xpath(r'//tr[2]/td/table[1]//td/strong/text()')
      for info in infos:
          temp = info.split('：')
          basicInfo[temp[0]] = temp[1]

      infos = selector.xpath(r'//tr[2]/td/table[2]//td/strong/text()')
      for info in infos:
          temp = info.split('：')
          if len(temp) == 1:
              basicInfo['婚否'] = temp[0]
          else:
              basicInfo[temp[0]] = temp[1]
      Credit['基本信息'] = basicInfo

      #信贷记录
      xindai = {}
      xindai = {}
      infos = selector.xpath(r'//tr[2]/td/table[3]//td/strong/text()')
      xindai['注释'] = infos[1].replace('\xa0','')
      infos = selector.xpath(r'//tr[2]/td/table[4]//td/text()')
      listinfo = ['信息概要']
      for info in infos:
          info = removeSymbol.removesymbol(self, info)
          if info != '':
              listinfo.append(info)
      #sinfos = str(infos).replace('\\n','').replace('\\r','').replace('\\t','').replace('\\`\\`','')
      xindai['信息概要'] = listinfo
      infos = selector.xpath(r'//tr[2]/td/table[4]//tr[1]/td[2]//span/text()')
      infofull = ''
      for info in infos:
          info = removeSymbol.removesymbol(self, info)
          if info != '':
            infofull += info
      xindai['逾期记录'] = infofull
      Credit['信贷记录'] = xindai

      #信用卡
      CreditCard = {}
      infos = selector.xpath(r'//div/div/table//tr[2]/td/ol[1]//text()')
      infolist = []
      #去除一些空的项
      for info in infos:
          info = removeSymbol.removesymbol(self, info)
          if info != '':
              infolist.append(info)

      key = infolist[0]
      infolist = infolist[1:]
      jsonlist = []

      for info in infolist:
          info = removeSymbol.removesymbol(self, info)
          if info != '':
              json = {}
              json['发卡时间'] = Re.reFind(self, info, r'(\d+年\d+月\d+日)')
              json['发卡行'] = Re.reFind(self, info, r'日(.*?)截')
              json['截止时间'] = Re.reFind(self, info, r'截至(.*?)，信用')
              print(Re.reFind(self, info, r'信用额度(.*?)，'))
              json['信用额度'] = Re.reFind(self, info, r'信用额度(.*?)，')
              json['已使用额度'] = Re.reFind(self, info, r'0，(.*?)。')
              jsonlist.append(json)
      CreditCard = {key:jsonlist}
      Credit['信用卡'] = CreditCard
      print(Credit)

      #公共记录

      recordlist = []
      infos = selector.xpath(r'//tr[2]/td/table[5]//text()')
      for info in infos:
          info = removeSymbol.removesymbol(self, info)
          if info != '':
              recordlist.append(info)
      recordlist = recordlist[1:]
      Credit['公共记录'] = recordlist

      #查询记录
      qrecordlist = []
      infos = selector.xpath(r'//tr[2]/td/table[7]//text()')
      for info in infos:
          info = removeSymbol.removesymbol(self, info)
          if info != '':
              qrecordlist.append(info)
      Credit['查询记录'] = qrecordlist
      print (Credit)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: Register.py Projeto: walleleung/SpiderSystem

class Register(object):
    def __init__(self):

        self.request = Singleton.GetInstance()
        self.Re = Re()
        self.getcpatcha = getCaptcha()
        self.namecheck = nameCheck()

    def register(self):
        firstUrl = 'https://ipcrs.pbccrc.org.cn/login.do?method=initLogin'
        firstHeader = {
            'User-Agent':
            'Mozilla / 5.0(Windows NT 6.1;WOW64;rv:52.0) Gecko / 20100101 Firefox / 52.0',
            'Referer': 'https://ipcrs.pbccrc.org.cn/index1.do'
        }

        firstResponse = self.request.get(firstUrl,
                                         headers=firstHeader,
                                         verify=False)
        if firstResponse.status_code == 200:
            firstResponse = firstResponse.content.decode('gbk')
            tokenfirst = self.Re.reFind(firstResponse, r'TOKEN" value="(.*?)"')
            print('第一个页面请求成功')

        #获取第二个页面
        secondUrl = 'https://ipcrs.pbccrc.org.cn/userReg.do'
        secondHeader = {
            'User-Agent':
            'Mozilla / 5.0(Windows NT 6.1;WOW64;rv:52.0) Gecko / 20100101 Firefox / 52.0',
            'Referer': 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp'
        }
        secondData = {
            'org.apache.struts.taglib.html.TOKEN': tokenfirst,
            'method': 'initReg'
        }
        secondResponse = self.request.post(secondUrl,
                                           headers=secondHeader,
                                           data=secondData,
                                           verify=False)
        if secondResponse.status_code == 200:
            secondResponse = secondResponse.content.decode('gbk')
            tokensecond = self.Re.reFind(secondResponse,
                                         r'TOKEN" value="(.*?)"')
            print('第二个页面请求成功')

        #获取第三个页面
        thirdUrl = 'https://ipcrs.pbccrc.org.cn/userReg.do'
        captcha = self.getcpatcha.predict()
        thirdHeader = {
            'Referer': 'https://ipcrs.pbccrc.org.cn/userReg.do',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            #'Accept': 'text/html, application xhtml+xml, application/xml;q=0.9,image/webp,*/*;q=0.8',
            #'Accept-Encoding': 'gzip, deflate, br',
            # 'Accept-Language': 'zh-CN,zh;q = 0.8',
            # 'Cache-Control': 'max-age=0',
            #'Connection': 'keep-alive',
            # 'Content-Length': '203',
            #'Host': 'ipcrs.pbccrc.org.cn',
            'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
            # 'Origin': 'https://ipcrs.pbccrc.org.cn',
            #'Upgrade-Insecure-Requests': '1'
        }
        thirdData = {
            'org.apache.struts.taglib.html.TOKEN': tokensecond,
            'method': 'checkIdentity',
            'userInfoVO.name': '王大丽',
            'userInfoVO.certType': '0',
            'userInfoVO.certNo': '532129198806123369',
            '_@IMGRC@_': captcha,
            '1': 'on'
        }

        # thirdData = urllib.parse.urlencode(thirdData)
        thirdResponse = self.request.post(thirdUrl,
                                          headers=thirdHeader,
                                          data=thirdData,
                                          verify=False)
        thirdResponse = thirdResponse.content.decode('gbk')
        compileCaptchaError = self.Re.reFind(thirdResponse, r'(验证码输入错误)')
        while compileCaptchaError:
            captcha = self.getcpatcha.predict()
            thirdData = {
                'org.apache.struts.taglib.html.TOKEN': tokensecond,
                'method': 'checkIdentity',
                'userInfoVO.name': '杨帆',
                'userInfoVO.certType': '0',
                'userInfoVO.certNo': '510184198907130057',
                '_@IMGRC@_': captcha,
                '1': 'on'
            }
            thirdresponse = self.request.post(thirdUrl,
                                              headers=thirdHeader,
                                              data=(thirdData),
                                              verify=False)
            thirdresponse = thirdresponse.content.decode('gbk')
            compileCaptchaError = self.Re.reFind(thirdresponse, r'(验证码输入错误)')
            print('')

        result = self.namecheck.namecheck('yy80188815')

        dmtUrl = 'https://ipcrs.pbccrc.org.cn/userReg.do'
        dtmHeader = {
            'User-Agent':
            'Mozilla / 5.0(Windows NT 6.1;WOW64;rv:52.0) Gecko / 20100101 Firefox / 52.0',
            'Referer': 'ipcrs.pbccrc.org.cn/userReg.do'
        }
        dtmData = {'method': 'getAcvitaveCode', 'mobileTel': '18980920233'}
        dtmResponse = self.request.post(secondUrl,
                                        headers=dtmHeader,
                                        data=dtmData,
                                        verify=False)