def getAllModels(): global driver, allModels try: if path.exists('zyxel_models.txt') and \ path.getsize('zyxel_models.txt')>2 and \ time.time() - path.getmtime('zyxel_models.txt')<3600*12: with open('zyxel_models.txt', 'r', encoding='utf-8') as fin: lines = fin.read() allModels = [_ for _ in lines.splitlines()] allModels = [_.strip() for _ in allModels if _.strip()] return # click 'Enter model number here' btn = waitClickable('button[data-id=modelName]') btn.click() time.sleep(0.1) inp = waitClickable('.form-control') inp.click() inp.send_keys(Keys.UP) time.sleep(0.1) inp.send_keys(Keys.UP) oldNumModels = getNumElem('div.dropdown-menu.open ul li a') while True: inp.send_keys(Keys.UP) time.sleep(0.1) inp.send_keys(Keys.UP) numModels = getNumElem('div.dropdown-menu.open ul li a') ulog('numModels=%d' % numModels) if numModels == oldNumModels: break oldNumModels = numModels allModels = [ _.text for _ in getElems('div.dropdown-menu.open ul li a') ] allModels = [_.strip() for _ in allModels if _.strip()] allModels = [ _ for _ in allModels if not _.lower().startswith('enter model ') ] ulog('len(allModels)=%d' % len(allModels)) with open('zyxel_models.txt', 'w', encoding='utf-8') as fout: fout.write('\n'.join(_ for _ in allModels)) btn.click() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName() + '_' + getFuncName() + '_excep.png')
def getAllModels(): global driver, allModels try: if path.exists('zyxel_models.txt') and \ path.getsize('zyxel_models.txt')>2 and \ time.time() - path.getmtime('zyxel_models.txt')<3600*12: with open('zyxel_models.txt','r',encoding='utf-8') as fin: lines = fin.read() allModels=[_ for _ in lines.splitlines()] allModels=[_.strip() for _ in allModels if _.strip()] return # click 'Enter model number here' btn = waitClickable('button[data-id=modelName]') btn.click() time.sleep(0.1) inp = waitClickable('.form-control') inp.click() inp.send_keys(Keys.UP) time.sleep(0.1) inp.send_keys(Keys.UP) oldNumModels = getNumElem('div.dropdown-menu.open ul li a') while True: inp.send_keys(Keys.UP) time.sleep(0.1) inp.send_keys(Keys.UP) numModels = getNumElem('div.dropdown-menu.open ul li a') ulog('numModels=%d'%numModels) if numModels == oldNumModels: break oldNumModels = numModels allModels = [_.text for _ in getElems('div.dropdown-menu.open ul li a')] allModels = [_.strip() for _ in allModels if _.strip()] allModels = [_ for _ in allModels if not _.lower().startswith('enter model ')] ulog('len(allModels)=%d'%len(allModels)) with open('zyxel_models.txt','w',encoding='utf-8') as fout: fout.write('\n'.join(_ for _ in allModels)) btn.click() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def harvestPage2(): global modelName modelName=getText('big > strong') print("Page2 modelName=",modelName) global driver numRows = getNumElem('tr#rsq') if numRows==0: return for iRow in range(2, numRows+1): row = waitClickable('tr#rsq:nth-child(%d)'%iRow) rowText = getElemText(row) uprint('Row%d %s'%(iRow, rowText)) if 'firmware' not in rowText.lower(): print(' -- bypass') continue uprint('Click '+rowText) row.click() modelName=getText('big > strong') print('Page3 modelName=%s'%modelName) desc=getText('.prodtd > table:nth-child(4) > tbody:nth-child(1) ' '> tr:nth-child(2) > td:nth-child(2)') uprint("Description="+desc) for fn9 in getElems('.fn9'): fileName = getElemText(fn9) fileExt = path.splitext(fileName)[1].lower() uprint('filaName="%s"'%fileName) if fileExt in ['.doc', '.docx', '.txt','.pdf','.htm','.html','.xls']: uprint(' -- fileName "%s" doesn\'t look like a firmware file'%fileName) global conn csr=conn.cursor() model=modelName csr.execute( "INSERT OR REPLACE INTO dlink(model,file_name,desc)" "VALUES(:model,:fileName,:desc)",locals() ) uprint('INSERT OR REPLACE INTO "%(model)s","%(fileName)s","%(desc)s"'% locals()) # waitDownloading() clickDownloadableElem(fn9) global driver driver.back()
def main(): startModelIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 startRevisionIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0 brand='Linksys' global driver,conn harvest_utils.driver=getFirefox() driver = harvest_utils.driver conn=sqlite3.connect('Linksys.sqlite3') csr=conn.cursor() csr.execute( "CREATE TABLE IF NOT EXISTS TFiles(" "brand TEXT," "model TEXT," "revision TEXT," # hardware version "fw_date DATE," "fw_ver TEXT," "file_title TEXT," "file_size INTEGER," "href TEXT," "file_sha1 TEXT," "PRIMARY KEY (brand,model,revision,file_title)" ");") conn.commit() driver.get('http://www.linksys.com/us/support/sitemap/') try: numModels = getNumElem('.item ul li a') print('numModels=',numModels) for modelIdx in range(startModelIdx, numModels): startModelIdx=0 modelElm = getElems('.item ul li a')[modelIdx] modelText = getElemText(modelElm, 5) print('modelIdx=',modelIdx) uprint('modelText="%s"'%modelText) # guess Possible Model model = guessModel(modelText) print('model=',model) rows = csr.execute( "SELECT model from TFiles WHERE model=:model",locals() ).fetchall() if rows: print('model "%s" already in TFiles, bypass!!'%model) continue modelElm.click() # click 'Download Software' try: waitClickable('a[title="Download Software"]', 40).click() except TimeoutException: print('No "Download Software" link found, bypass!!') csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,'')", locals()) conn.commit() print('INSERT model="%s"'%model) driver.back() continue # enumerate all accordians accordians = getElems('.article-accordian', 10) numAccordians=len(accordians) print('numAccordians=',numAccordians) print('driver.current_url=', driver.current_url) for revisionIdx in range(startRevisionIdx, numAccordians): startRevisionIdx=0 accordians = getElems('.article-accordian') # expand accordian (one-based) accordian = accordians[revisionIdx] revisionTxt = getElemText(accordian) print('revisionIdx=',revisionIdx) uprint('revisionTxt="%s"'%revisionTxt) revision = guessRevision(revisionTxt) print('revision=',revision) divId = accordian.get_attribute('data-collapse-target') # expand accordian 'revision'='Hardware Version' driver.execute_script( "document.querySelectorAll('.article-accordian')[%d].click()" %(revisionIdx)) divElm = waitVisible('#'+divId) divTxt = getElemTextUntilStabled(divElm,10,2.5) assert divTxt uprint('divTxt="%s"'%divTxt) numDowns = getCount(divTxt, 'Download') if numDowns ==0: csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,:revision)",locals()) conn.commit() print('INSERT "%(model)s","%(revision)s"'%locals()) continue downElms =iter(divElm.find_elements_by_css_selector('a')) lastSpanEnd=0 for downIdx in range(numDowns): spanBegin = getNthIndex(divTxt, downIdx, 'Download') spanEnd = divTxt.find('\n', spanBegin+len('Download')) if spanEnd==-1: spanEnd=len(divTxt) foreword='\n'.join(reversed(divTxt[lastSpanEnd:spanEnd].splitlines())) fwDate=guessDate(foreword) fileSize = guessFileSize(foreword) fwVer = guessVersion(foreword) if fwVer: fileTitle = guessFileTitle(foreword, fwVer) else: fileTitle = guessFileTitle2(foreword) while True: downElm = next(downElms) if downElm.text.strip().startswith('Download'): break href=downElm.get_attribute('href') lastSpanEnd=spanEnd csr.execute( "INSERT OR REPLACE INTO TFiles(brand,model,revision," "fw_date, fw_ver, file_title, file_size, " "href) VALUES (:brand,:model,:revision," ":fwDate, :fwVer, :fileTitle," ":fileSize, :href)", locals()) conn.commit() uprint("INSERT '%(model)s', '%(revision)s', '%(fwDate)s'" ", '%(fwVer)s', '%(fileTitle)s', '%(fileSize)d'" ", '%(href)s'" %locals()) driver.back() driver.back() except http.client.IncompleteRead as ex: print(ex) import traceback; traceback.print_exc() print('-- Selenium exhausted') driver.quit() except Exception as ex: import ipdb; ipdb.set_trace() print(ex) print('driver.current_url=',driver.current_url) import traceback; traceback.print_exc() print('-- terminate firefox') driver.quit()
def selectSoftwareType(): """ This page would be jumped to versionWalker() or either jumped back to treeWalker forward: may auto jump backward: not auto jump """ global startTrail,prevTrail,driver try: waitText('.csProductSelectorBreadcrumb', 5, 1) waitUntilStable('.csProductSelectorBreadcrumb', 1, 0.3) depth = getDepth() jumpedLevels =depth - len(prevTrail) ulog('jumpedLevels=%d'%jumpedLevels) assert jumpedLevels>=0 ulog('depth=%d, but prevTrail=%s'%(depth, prevTrail)) startIdxFromStartTrail=False def getStartIdx()->int: if startTrail: nonlocal startIdxFromStartTrail startIdxFromStartTrail=True return startTrail.pop(0) else: return 0 if jumpedLevels>0: while depth>len(prevTrail): startIdx=getStartIdx() prevTrail+=[startIdx] else: startIdx=getStartIdx() assert depth==len(prevTrail) ulog('startTrail=%s'%startTrail) ulog('prevTrail=%s'%prevTrail) ulog('startIdx=%d'%startIdx) ulog('url=%s'%driver.current_url) crumbs = waitText('.csProductSelectorBreadcrumb') uprint('crumbs=%s'%(crumbs.replace('\n',' > '))) if not hasElem('table#imageTableContainer', 1.5,0.4): if jumpedLevels>0: startIdx=getStartIdx() if depth > len(prevTrail): prevTrail+=[startIdx] sdpBannerTitle=waitText('td.SDPBannerTitle').strip() ulog('SDBBannerTitle="%s"'%sdpBannerTitle) assert sdpBannerTitle.lower().startswith('select ') waitUntil(lambda: getNumElem('div.csWrapper li a') > 0) swtypes = getElems('div.csWrapper li a') ulog('%s'%[(i,getElemText(_)) for i,_ in enumerate(swtypes)]) numSwTypes=len(swtypes) assert numSwTypes > 0 for idx in range(startIdx, numSwTypes): ulog('goto Trail=%s'%(prevTrail+[idx])) swtypes = getElems('div.csWrapper li a') ulog('Click "%s"'% getElemText(swtypes[idx])) clickElem(swtypes[idx]) prevTrail+=[idx] selectSoftwareType() prevTrail.pop() # Select a Product -> Select a Software type -> Select a Platform # https://software.cisco.com/download/type.html?mdfid=277873153&flowid=170&softwareid=283724313 # Downloads Home >Products >Cisco Interfaces and Modules >WAN Interface Cards >1700/2600/3600/3700 Series 2-Port Analog Modem WAN Interface Card >Analog Firmware Loader >Windows 2000-v6780 # not auto back to treeWalker # go back manually crumbs = getElems('.csProductSelectorBreadcrumb a') ulog('manually backto "%s"'%getElemText(crumbs[-1])) ulog('prevTail=%s'%prevTrail) clickElem(crumbs[-1]) # do I need to pop prevTrail? # prevTrail.pop() else: ulog('auto forward to versionWalker') if startIdxFromStartTrail: startTrail.insert(0, startIdx) for i in range(jumpedLevels): if not startTrail: break startTrail.pop(0) versionWalker() for i in range(jumpedLevels): crumbs = getElems('.csProductSelectorBreadcrumb a') ulog('manually backto "%s"'%getElemText(crumbs[-1])) ulog('prevTail=%s'%prevTrail) clickElem(crumbs[-1]) prevTrail.pop() except Exception as ex: ipdb.set_trace() print(ex); traceback.print_exc() driver.save_screenshot('cisco_selectSoftwareType.png')