def getAllModels(): global driver, models act = ActionChains(driver) numElm = lambda c: driver.execute_script("return $('%s').length" % c) try: if path.exists('zyxel_models.txt') and \ path.getsize('zyxel_models.txt')>0 and \ time.time()-path.getmtime('zyxel_models.txt') < 3600*24*7: with open('zyxel_models.txt', 'r', encoding='utf-8') as fin: models = [] for _ in fin: models += [_] return goToUrl(rootUrl) btn = waitVisible('.search-select button') act.move_to_element(btn).click(btn).perform() inp = waitVisible('.input-block-level') act.move_to_element(inp).click(inp).perform() act.send_keys(Keys.DOWN).perform() time.sleep(0.1) act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform() time.sleep(0.1) numModels = numElm('#searchDropUl li') uprint('numModels=%s' % numModels) while True: act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform() time.sleep(0.1) numModels2 = numElm('#searchDropUl li') if numModels == numModels2: break numModels = numModels2 uprint('numModels=%s' % numModels) uprint('numModels=%s' % numModels) models = [ _.get_attribute('data') for _ in getElems('#searchDropUl li') ] models = [_ for _ in models if _] uprint('len(models)=%s' % len(models)) with open('zyxel_models.txt', 'w', encoding='utf-8') as fout: for m in models: fout.write(m + '\n') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName() + '_' + getFuncName() + '_excep.png')
def getAllModels(): global driver, models act=ActionChains(driver) numElm=lambda c:driver.execute_script("return $('%s').length"%c) try: if path.exists('zyxel_models.txt') and \ path.getsize('zyxel_models.txt')>0 and \ time.time()-path.getmtime('zyxel_models.txt') < 3600*24*7: with open('zyxel_models.txt','r',encoding='utf-8') as fin: models=[] for _ in fin: models += [_] return goToUrl(rootUrl) btn=waitVisible('.search-select button') act.move_to_element(btn).click(btn).perform() inp=waitVisible('.input-block-level') act.move_to_element(inp).click(inp).perform() act.send_keys(Keys.DOWN).perform() time.sleep(0.1) act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform() time.sleep(0.1) numModels = numElm('#searchDropUl li') uprint('numModels=%s'%numModels) while True: act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform() time.sleep(0.1) numModels2 = numElm('#searchDropUl li') if numModels == numModels2: break numModels = numModels2 uprint('numModels=%s'%numModels) uprint('numModels=%s'%numModels) models = [_.get_attribute('data') for _ in getElems('#searchDropUl li')] models = [_ for _ in models if _] uprint('len(models)=%s'%len(models)) with open('zyxel_models.txt', 'w', encoding='utf-8') as fout: for m in models: fout.write(m + '\n') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def main(): startModelIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 startRevisionIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0 brand='Linksys' global driver,conn harvest_utils.driver=getFirefox() driver = harvest_utils.driver conn=sqlite3.connect('Linksys.sqlite3') csr=conn.cursor() csr.execute( "CREATE TABLE IF NOT EXISTS TFiles(" "brand TEXT," "model TEXT," "revision TEXT," # hardware version "fw_date DATE," "fw_ver TEXT," "file_title TEXT," "file_size INTEGER," "href TEXT," "file_sha1 TEXT," "PRIMARY KEY (brand,model,revision,file_title)" ");") conn.commit() driver.get('http://www.linksys.com/us/support/sitemap/') try: numModels = getNumElem('.item ul li a') print('numModels=',numModels) for modelIdx in range(startModelIdx, numModels): startModelIdx=0 modelElm = getElems('.item ul li a')[modelIdx] modelText = getElemText(modelElm, 5) print('modelIdx=',modelIdx) uprint('modelText="%s"'%modelText) # guess Possible Model model = guessModel(modelText) print('model=',model) rows = csr.execute( "SELECT model from TFiles WHERE model=:model",locals() ).fetchall() if rows: print('model "%s" already in TFiles, bypass!!'%model) continue modelElm.click() # click 'Download Software' try: waitClickable('a[title="Download Software"]', 40).click() except TimeoutException: print('No "Download Software" link found, bypass!!') csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,'')", locals()) conn.commit() print('INSERT model="%s"'%model) driver.back() continue # enumerate all accordians accordians = getElems('.article-accordian', 10) numAccordians=len(accordians) print('numAccordians=',numAccordians) print('driver.current_url=', driver.current_url) for revisionIdx in range(startRevisionIdx, numAccordians): startRevisionIdx=0 accordians = getElems('.article-accordian') # expand accordian (one-based) accordian = accordians[revisionIdx] revisionTxt = getElemText(accordian) print('revisionIdx=',revisionIdx) uprint('revisionTxt="%s"'%revisionTxt) revision = guessRevision(revisionTxt) print('revision=',revision) divId = accordian.get_attribute('data-collapse-target') # expand accordian 'revision'='Hardware Version' driver.execute_script( "document.querySelectorAll('.article-accordian')[%d].click()" %(revisionIdx)) divElm = waitVisible('#'+divId) divTxt = getElemTextUntilStabled(divElm,10,2.5) assert divTxt uprint('divTxt="%s"'%divTxt) numDowns = getCount(divTxt, 'Download') if numDowns ==0: csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,:revision)",locals()) conn.commit() print('INSERT "%(model)s","%(revision)s"'%locals()) continue downElms =iter(divElm.find_elements_by_css_selector('a')) lastSpanEnd=0 for downIdx in range(numDowns): spanBegin = getNthIndex(divTxt, downIdx, 'Download') spanEnd = divTxt.find('\n', spanBegin+len('Download')) if spanEnd==-1: spanEnd=len(divTxt) foreword='\n'.join(reversed(divTxt[lastSpanEnd:spanEnd].splitlines())) fwDate=guessDate(foreword) fileSize = guessFileSize(foreword) fwVer = guessVersion(foreword) if fwVer: fileTitle = guessFileTitle(foreword, fwVer) else: fileTitle = guessFileTitle2(foreword) while True: downElm = next(downElms) if downElm.text.strip().startswith('Download'): break href=downElm.get_attribute('href') lastSpanEnd=spanEnd csr.execute( "INSERT OR REPLACE INTO TFiles(brand,model,revision," "fw_date, fw_ver, file_title, file_size, " "href) VALUES (:brand,:model,:revision," ":fwDate, :fwVer, :fileTitle," ":fileSize, :href)", locals()) conn.commit() uprint("INSERT '%(model)s', '%(revision)s', '%(fwDate)s'" ", '%(fwVer)s', '%(fileTitle)s', '%(fileSize)d'" ", '%(href)s'" %locals()) driver.back() driver.back() except http.client.IncompleteRead as ex: print(ex) import traceback; traceback.print_exc() print('-- Selenium exhausted') driver.quit() except Exception as ex: import ipdb; ipdb.set_trace() print(ex) print('driver.current_url=',driver.current_url) import traceback; traceback.print_exc() print('-- terminate firefox') driver.quit()
def tableRowWalker(fwVer:str): global startTrail,prevTrail,driver try: try: cells = getElems('table#imageTableContainer tr td') except TimeoutException: ulog("Timeout at getElems('table#imageTableContainer tr td'); bypass!") return assert len(cells)%4==0 numFiles=int(len(cells)/4) cellTxt = [getElemText(_) for _ in cells] try: carts = getElems('table#imageTableContainer tr td input', 10) cartsTxt = [getElemAttr(_,'title').lower().strip() for _ in carts] # https://software.cisco.com/download/release.html?mdfid=282822110&flowid=266&softwareid=280805680&release=15.0.2-SE8&relind=AVAILABLE&rellifecycle=MD&reltype=latest # fwVer='15.0.2-SE1(ED)' has len(cells)==6 and numFiles==4 cartsTxt = flatCarts(cartsTxt) assert len(cartsTxt)==numFiles needContracts=[ int('cart' in _) for _ in cartsTxt] # needContract=int(int(len(cells2)/numFiles)==2) except TimeoutException: ulog('needContract= "Deferral"') needContracts= [-1]*numFiles # Deferral ulog('needContracts=%s'%needContracts) quats = combinePerFour(cellTxt) assert len(quats)==len(needContracts) quats = list(zip(quats, needContracts)) fileDescs=[] for quat,needContract in quats: fileTitle=quat[0].split('\n')[0].strip() fileName=quat[0].split('\n')[1].strip() relDate=guessDate(quat[1]) fileSize=guessFileSize(quat[2]) fileDescs.append((fileTitle,fileName,relDate,fileSize, needContract)) model = waitText('td.SDPBannerTitle') ulog('model="%s"'%model) pageUrl=driver.current_url if startTrail: startIdx=startTrail.pop(0) else: startIdx=0 ulog('startIdx=%d'%startIdx) try: imageRhSide=driver.find_element_by_css_selector('div#imageRhSide') driver.execute_script( "arguments[0].scroll(0,%d);"%(62*startIdx), imageRhSide) except: pass spans=getElems('#imageTableContainer tr td span.overlay_img') assert len(spans)==numFiles for idx in range(startIdx, numFiles): ulog('getSha512 Trail=%s'%(prevTrail+[idx])) infos = getSha512(spans[idx]) if infos: infos.update(dict(needContract=needContracts[idx], model=model, pageUrl=pageUrl,treeTrail=str(prevTrail+[idx]))) else: fileTitle,fileName,relDate,fileSize,needContract=fileDescs[idx] infos=dict(fileTitle=fileTitle,fileName=fileName,relDate=relDate, fileSize=fileSize,needContract=needContract, pageUrl=pageUrl,treeTrail=str(prevTrail+[idx]), md5=None,sha512=None,model=model,fwVer=fwVer) sql("INSERT OR REPLACE INTO TFiles(model," " fw_date,fw_ver,file_title,file_name,file_size," " need_contract, page_url, tree_trail, md5, sha512) " "VALUES (:model," " :relDate,:fwVer,:fileTitle,:fileName,:fileSize," " :needContract, :pageUrl, :treeTrail, :md5, :sha512)", infos) ulog('UPSERT "%(model)s", "%(relDate)s", "%(fwVer)s", ' '"%(fileTitle)s", "%(fileName)s", %(needContract)d,' ' "%(treeTrail)s", %(pageUrl)s, "%(md5)s", "%(sha512)s"' %infos) except Exception as ex: ipdb.set_trace() print(ex); traceback.print_exc() driver.save_screenshot('cisco_tableRowWalker.png')