if not exist: print u"worker[%d]结束" % pid break tableId, keyword = reuslt pfi.keyword = keyword pfi.tableId = tableId timeoutNum = OVER_TIME_PROCESS if pfi.needInitDriver: timeoutNum += OVER_TIME_INIT_DRIVER if not doByTimeout(getOnWtitch, pfi, timeoutNum): (STATUS_NON, tableId) print u"worker[%d]超时,进行重启" % pid pfi.needInitDriver = True except Exception, e: pfi.needInitDriver = True print 'process error', e, traceback.format_exc() if __name__ == "__main__": __lock = threading.Lock() updateStatus(STATUS_SUCCESS, None) poolNum = PROCESS_NUM arg_list = [] for i in range(poolNum): req = {} req["lock"] = __lock req["pid"] = i arg_list.append(req) doInThread(process, arg_list, poolNum=poolNum)
recoreDate.append([href]) driverContent.quit() while True: retry(lambda: driver.find_elements_by_xpath("//p[@class='title']/a")) aEles = driver.find_elements_by_xpath("//p[@class='title']/a") hrefs = [] for aEle in aEles: href = aEle.get_attribute("href") if href in matchRecordData: print "processd ", href else: hrefs.append(href) if 0 == len(hrefs): print "no new href" else: doInThread(processInNewDriver, hrefs, poolNum=4) write(recoreDate, FILENAME_RECORD) try: driver.find_element_by_xpath( "//a[@id='EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.Page' and @sid='3']" ).send_keys(Keys.ENTER) print "下一页" import time time.sleep(1) except Exception, e: print "已到尾页" break