def readpage(driver): stamp = 1 global zzcount while (True): try: name = textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[' + str(stamp) + ']/td[2]/a', driver) href = hrefXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[' + str(stamp) + ']/td[2]/a', driver) zz = textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[' + str(stamp) + ']/td[4]', driver) qy = textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[' + str(stamp) + ']/td[5]', driver) zzl = zz.split(';') for i in zzl: if i != '': zzcount += 1 print(zzcount, '|', qy, '|', name, '|', i, '|', href) if zzcount > int(zznum): return False except: break stamp += 1 return True
def readzz(): initwait('/html/body/form/div[6]/div[2]/div/div[1]/h2', driver) name = textXpath('//*[@id="ContentPlaceHolder1_PersonName"]', driver) qyname = textXpath( '//*[@id="company_certificates"]/div[1]/table/tbody/tr[1]/td[2]/a', driver) flag = True stamp = 1 global zzcount while (flag): try: zzname = textXpath( '//*[@id="company_certificates"]/div[' + str(stamp) + ']/div', driver) if zzname.split('▪')[0] == '': return True else: stamp += 1 zzcount += 1 print(zzcount, '|', name, '|', zzname, '|', qyname) except: break if zzcount > int(zznum): return False else: return True
def readzz(driver): initwait('//*[@id="Table2"]/tbody/tr[1]/td/strong', driver) name = textXpath('//*[@id="FormView1_xmLabel"]', driver) qyname = textXpath( '//*[@id="FormView1"]/tbody/tr/td/table/tbody/tr[1]/td[4]/a', driver) href = driver.current_url global zzcount global zzallcount stamp = 1 while (True): try: zz = textXpath( '/html/body/form/div[3]/table/tbody/tr[4]/td/table/tbody/tr/td/table[1]/tbody/tr[' + str(stamp) + ']/td[1]', driver) zzlx = textXpath( '/html/body/form/div[3]/table/tbody/tr[4]/td/table/tbody/tr/td/table[1]/tbody/tr[' + str(stamp) + ']/td[5]', driver) zzdj = textXpath( '/html/body/form/div[3]/table/tbody/tr[4]/td/table/tbody/tr/td/table[1]/tbody/tr[' + str(stamp) + ']/td[4]', driver) stamp += 1 if zz != '': zzcount += 1 zzallcount += 1 print(zzallcount, zzlx, qyname, name, zz, zzdj, href) except: break if zzcount > int(zznum): zzcount = 0 return False else: return True
def readzz(): try: initwait( '//*[@id="form1"]/div[3]/div[2]/div[1]/table/tbody/tr[1]/td[2]', driver) except: return True name = textXpath( '//*[@id="form1"]/div[3]/div[2]/div[1]/table/tbody/tr[1]/td[2]', driver) qys = textXpath('//*[@id="Td2"]', driver) stamp = 1 global zzcount while (True): try: qylx = textXpath( '//*[@id="form1"]/div[3]/div[2]/div[2]/div[3]/div[1]/div[' + str(stamp) + ']/span', driver) zzmclist = textXpath( '//*[@id="form1"]/div[3]/div[2]/div[2]/div[3]/div[1]/table[' + str(stamp) + ']/tbody/tr[3]/td[2]', driver).split(';') for i in zzmclist: if i != '': zzcount += 1 print(zzcount, '|', qys, '|', qylx, '|', name, '|', i) stamp += 1 except: break if zzcount > int(zznum): return False else: return True
def readpage(driver): flag = True stamp = 1 # init textXpath('/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[1]', driver, 120) while (flag): try: href = hrefXpath('/html/body/div[3]/div/section[1]/table/tbody/tr[' + str(stamp) + ']/td[2]/a', driver, 1) qys=textXpath('/html/body/div[3]/div/section[1]/table/tbody/tr['+str(stamp)+']/td[5]',driver) stopflag=readzzfromurl(href,qys) stamp += 1 if not stopflag: print(zzcount, zznum) return False except: break return True
def readzz(driver): initwait('//*[@id="Table2"]/tbody/tr/td/strong', driver) name = textXpath('//*[@id="FormView1_qymcLabel"]', driver) href = driver.current_url global zzcount stamp = 1 while (True): try: zz = textXpath('//*[@id="zz_new"]/td[4]/li[' + str(stamp) + ']', driver) stamp += 1 if zz != '': zzcount += 1 print(zzcount, name, zz, href) except: break stamp = 3 while (True): try: beforeXpath = '//*[@id="form1"]/table[2]/tbody/tr[' + str( stamp) + ']/td[3]' zz = textXpath(beforeXpath, driver) stamp += 1 if zz != '': zzstamp = 1 while (True): try: zzin = textXpath( beforeXpath + '/li[' + str(zzstamp) + ']', driver) zzstamp += 1 if zzin != '': zzcount += 1 print(zzcount, name, zzin, href) except: break except: break if zzcount > int(zznum): return False else: return True
def currentpage(driver, driver2): flag = True stamp = 1 while (flag): try: name = textXpath( '//*[@id="app"]/div/div/div[2]/div[3]/div[1]/div[3]/table/tbody/tr[' + str(stamp) + ']/td[3]/div/span', driver2) readqyname(driver, driver2) stamp += 1 except: break
def searchKey(driver, key): sendKeyXpath( '/html/body/form/div[3]/div/table[3]/tbody/tr/td/table/tbody/tr[2]/td[6]/input', key, driver) try: current = textXpath( '/html/body/form/div[3]/div/table[5]/tbody/tr/td/table[1]/tbody/tr[1]/td[2]', driver) except: current = '' clickXpath( '/html/body/form/div[3]/div/table[3]/tbody/tr/td/table/tbody/tr[4]/td/input[1]', driver) try: while (current == textXpath( '/html/body/form/div[3]/div/table[5]/tbody/tr/td/table[1]/tbody/tr[1]/td[2]', driver, 3)): sleep(3) except: return readpage(driver)
def readlx(startpage, driver): jumpage(startpage, driver) flag = True while (flag): stopflag = readpage(driver) if not stopflag: break try: current = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) a = current clickforname('/html/body/div[3]/div/section[2]/div/nav/ul/li[', 1, ']/a', '下一页', driver) while (current == a): sleep(3) a = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) except: break
def jumpage(startpage, driver): currentindex = '' stamp = 1 max = 5 while (currentindex != str(startpage)): try: currentindex = textXpath( '/html/body/div[3]/div/section[2]/div/nav/ul/li[' + str(stamp) + ']/a', driver) except: try: current = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) a = current clickforname('/html/body/div[3]/div/section[2]/div/nav/ul/li[', 1, ']/a', str(max), driver) while (current == a): sleep(3) a = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) max += 5 stamp = 0 current = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) a = current clickforname('/html/body/div[3]/div/section[2]/div/nav/ul/li[', 1, ']/a', '下一页', driver) while (current == a): sleep(3) a = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) except: break stamp += 1
def readtype(driver): stamp = 1 global zzcount while (True): flag = readpage(driver) if not flag: zzcount = 0 return False current = textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[1]/td[2]/a', driver) clickforname('/html/body/form/div[5]/div[2]/div/div/div/ul/li[', stamp, ']/a', '下一页', driver) try: while (current == textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[1]/td[2]/a', driver, 30)): sleep(3) except: zzcount = 0 return True zzcount = 0 return True
def readqyname(driver, driver2): flag = True stamp = 1 while (flag): try: temp = textXpath( '/html/body/div[1]/div/div/div[2]/div[3]/div[1]/div[3]/table/tbody/tr[' + str(stamp) + ']/td[3]/div/span', driver2) href = searchcompany(temp, driver) idlist = href.split('=') id = idlist[len(idlist) - 1] url = "http://202.61.88.193:89//api/getdata/GetEnteZsList/" + id getzzfromcompnay(url, href) stamp += 1 except: flag = False break
flag = True stamp = 2 while (flag): clickXpath( '/html/body/form/div[5]/div/div/div/div[1]/div/fieldset/table/tbody/tr[1]/td[2]/select', driver) try: clickXpath( '/html/body/form/div[5]/div/div/div/div[1]/div/fieldset/table/tbody/tr[1]/td[2]/select/option[' + str(stamp) + ']', driver) except: break next = True try: current = textXpath( '/html/body/form/div[5]/div/div/div/div[2]/table/tbody/tr[1]/td[2]/a', driver) except: current = '' a = current clickXpath('//*[@id="MainContent_Button1"]', driver) while (a == current): sleep(3) try: a = textXpath( '/html/body/form/div[5]/div/div/div/div[2]/table/tbody/tr[1]/td[2]/a', driver) except: next = False break stamp += 1
stamp += 1 if not stopflag: print(zzcount, zznum) return False except: break return True if __name__ == '__main__': driver= LoginUrl('http://gcxm.hunanjs.gov.cn/dataservice.html?queryType=0&keyword=') print("*1、页面拖动验证条;2、先选好起始爬取页") print("输入爬取资质数:") zznum=input() flag=True stamp=1 while(flag): stopflag=readpage(driver) if not stopflag: break try: current=textXpath('/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a',driver) a=current clickforname('/html/body/div[3]/div/section[2]/div/nav/ul/li[',1,']/a','下一页',driver) while(current==a): sleep(3) a=textXpath('/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a',driver) except: break print(0)
if __name__ == '__main__': driver = LoginUrl("http://jst.sc.gov.cn/xxgx/Enterprise/eList.aspx") driver2 = LoginUrl("http://jzsc.mohurd.gov.cn/data/company?complexname=") openquanguogw(driver2) print('input zznum:') zznum = input() flag = True stamp = 1 while (flag): readqyname(driver, driver2) try: current = textXpath( '/html/body/div[1]/div/div/div[2]/div[3]/div[1]/div[3]/table/tbody/tr[1]/td[3]/div/span', driver2) a = current clickXpath( '/html/body/div[1]/div/div/div[2]/div[3]/div[2]/button[2]/i', driver2) while (a == current): sleep(3) a = textXpath( '/html/body/div[1]/div/div/div[2]/div[3]/div[1]/div[3]/table/tbody/tr[1]/td[3]/div/span', driver2) except: break if zzcount > int(zznum): flag = False
else: _ = 1 except: flag = False break return True if __name__ == '__main__': url = 'http://dn4.gxzjt.gov.cn:1141/WebInfo/Default.aspx' driver = LoginUrIE(url) print('input zznum:') zznum = input() flag = True stamp = 1 while (flag): if not readpage(driver): break current = textXpath( '//*[@id="ContentPlaceHolder1_List_Datagrid1"]/tbody/tr[2]/td[2]/a', driver) clickforname( '//*[@id="ContentPlaceHolder1_List_Pager"]/table/tbody/tr/td[2]/a[', stamp, ']', '下一页>', driver) while (current == textXpath( '//*[@id="ContentPlaceHolder1_List_Datagrid1"]/tbody/tr[2]/td[2]/a', driver, 3)): sleep(3)
print('input zznum:') zznum = input() stamp = 2 while (True): clickXpath('//*[@id="ddlSpecialty"]', driver) try: clickXpath( '/html/body/form/div[4]/div[2]/table[1]/tbody/tr[1]/td[4]/select/option[' + str(stamp) + ']', driver) except: break stamp += 1 try: current = textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[1]/td[2]/a', driver, 10) except: current = '' clickXpath('//*[@id="btnSearch"]', driver) sleep(3) try: while (current == textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[1]/td[2]/a', driver, 10)): sleep(3) except: stamp += 1 continue readtype(driver)
driver.switch_to.window(handle) if not flag: return False except: driver.switch_to.window(handle) break return True if __name__ == '__main__': url = 'http://218.69.33.182/epr/Search/C_Common/CE/index.aspx' driver = LoginUrlChrome(url) print("input zznum:") zznum = input() stamp = 5 while (True): flag = readpage(driver) if not flag: break current = textXpath( '/html/body/form/div[3]/div/div/table/tbody/tr/td/table[1]/tbody/tr[1]/td[2]', driver) clickforname( '//*[@id="ASPxGridView2_ctl04"]/tbody/tr/td/table/tbody/tr/td[', stamp, ']', '下一页', driver) while (current == textXpath( '/html/body/form/div[3]/div/div/table/tbody/tr/td/table[1]/tbody/tr[1]/td[2]', driver, 3)): sleep(3) driver.quit()