def readlx(startpage, driver): jumpage(startpage, driver) flag = True while (flag): stopflag = readpage(driver) if not stopflag: break try: current = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) a = current clickforname('/html/body/div[3]/div/section[2]/div/nav/ul/li[', 1, ']/a', '下一页', driver) while (current == a): sleep(3) a = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) except: break
def jumpage(startpage, driver): currentindex = '' stamp = 1 max = 5 while (currentindex != str(startpage)): try: currentindex = textXpath( '/html/body/div[3]/div/section[2]/div/nav/ul/li[' + str(stamp) + ']/a', driver) except: try: current = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) a = current clickforname('/html/body/div[3]/div/section[2]/div/nav/ul/li[', 1, ']/a', str(max), driver) while (current == a): sleep(3) a = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) max += 5 stamp = 0 current = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) a = current clickforname('/html/body/div[3]/div/section[2]/div/nav/ul/li[', 1, ']/a', '下一页', driver) while (current == a): sleep(3) a = textXpath( '/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a', driver) except: break stamp += 1
def readtype(driver): stamp = 1 global zzcount while (True): flag = readpage(driver) if not flag: zzcount = 0 return False current = textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[1]/td[2]/a', driver) clickforname('/html/body/form/div[5]/div[2]/div/div/div/ul/li[', stamp, ']/a', '下一页', driver) try: while (current == textXpath( '/html/body/form/div[5]/div[2]/div/table/tbody/tr[1]/td[2]/a', driver, 30)): sleep(3) except: zzcount = 0 return True zzcount = 0 return True
driver.switch_to.window(handle) if not flag: return False except: driver.switch_to.window(handle) break return True if __name__ == '__main__': url = 'http://218.69.33.182/epr/Search/C_Common/CE/index.aspx' driver = LoginUrlChrome(url) print("input zznum:") zznum = input() stamp = 5 while (True): flag = readpage(driver) if not flag: break current = textXpath( '/html/body/form/div[3]/div/div/table/tbody/tr/td/table[1]/tbody/tr[1]/td[2]', driver) clickforname( '//*[@id="ASPxGridView2_ctl04"]/tbody/tr/td/table/tbody/tr/td[', stamp, ']', '下一页', driver) while (current == textXpath( '/html/body/form/div[3]/div/div/table/tbody/tr/td/table[1]/tbody/tr[1]/td[2]', driver, 3)): sleep(3) driver.quit()
stamp += 1 if not stopflag: print(zzcount, zznum) return False except: break return True if __name__ == '__main__': driver= LoginUrl('http://gcxm.hunanjs.gov.cn/dataservice.html?queryType=0&keyword=') print("*1、页面拖动验证条;2、先选好起始爬取页") print("输入爬取资质数:") zznum=input() flag=True stamp=1 while(flag): stopflag=readpage(driver) if not stopflag: break try: current=textXpath('/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a',driver) a=current clickforname('/html/body/div[3]/div/section[2]/div/nav/ul/li[',1,']/a','下一页',driver) while(current==a): sleep(3) a=textXpath('/html/body/div[3]/div/section[1]/table/tbody/tr[1]/td[2]/a',driver) except: break print(0)
driver.switch_to.window(newhandle) flag = readzz() driver.close() if not flag: return False driver.switch_to.window(handle) return True if __name__ == '__main__': url = 'http://dn4.gxzjt.gov.cn:1141/WebInfo/Person/Person.aspx' driver = LoginUrl(url) print('input zznum:') zznum = input() stamp = 1 '/html/body/form/div[6]/div[2]/div[2]/div/div/table/tbody/tr/td[2]/a[' ']' while (True): flag = readpage(driver) if not flag: break current = textXpath( '/html/body/form/div[6]/div[2]/div[2]/div/table/tbody/tr[2]/td[2]/a', driver) clickforname( '/html/body/form/div[6]/div[2]/div[2]/div/div/table/tbody/tr/td[2]/a[', stamp, ']', '下一页>', driver) while (current == textXpath( '/html/body/form/div[6]/div[2]/div[2]/div/table/tbody/tr[2]/td[2]/a', driver)): sleep(3)
driver.switch_to.window(newhandle) flag = readzz() driver.close() if not flag: return False driver.switch_to.window(handle) return True if __name__ == '__main__': driver = LoginUrl( 'http://110.16.70.26/nmjgpublisher/corpinfo/CorpInfoObtain.aspx#') print('input zznum:') zznum = input() stamp = 1 while (True): flag = readpage(driver) if not flag: break current = textXpath( '/html/body/form/div[4]/div[2]/div[1]/div[2]/div/div/table/tbody/tr[1]/td[2]', driver) clickforname( '/html/body/form/div[4]/div[2]/div[1]/div[2]/div/div/div/div/ul/li[', stamp, ']/a', '下一页', driver) while (current == textXpath( '/html/body/form/div[4]/div[2]/div[1]/div[2]/div/div/table/tbody/tr[1]/td[2]', driver)): sleep(3)
else: _ = 1 except: flag = False break return True if __name__ == '__main__': url = 'http://dn4.gxzjt.gov.cn:1141/WebInfo/Default.aspx' driver = LoginUrIE(url) print('input zznum:') zznum = input() flag = True stamp = 1 while (flag): if not readpage(driver): break current = textXpath( '//*[@id="ContentPlaceHolder1_List_Datagrid1"]/tbody/tr[2]/td[2]/a', driver) clickforname( '//*[@id="ContentPlaceHolder1_List_Pager"]/table/tbody/tr/td[2]/a[', stamp, ']', '下一页>', driver) while (current == textXpath( '//*[@id="ContentPlaceHolder1_List_Datagrid1"]/tbody/tr[2]/td[2]/a', driver, 3)): sleep(3)