def main(): global startTrail,prevTrail,driver,conn try: startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]] uprint('startTrail=%s'%startTrail) conn = sqlite3.connect('netgear.sqlite3') sql("CREATE TABLE IF NOT EXISTS TFiles(" "id INTEGER NOT NULL," "vendor TEXT," "model TEXT," "revision TEXT," "fw_date TEXT," "fw_ver TEXT," "file_name TEXT," "file_size TEXT," "page_url TEXT," "file_url TEXT," "tree_trail TEXT," "file_sha1 TEXT," "PRIMARY KEY (id)," "UNIQUE(vendor,model,revision,file_name)" ");") driver = harvest_utils.getFirefox() harvest_utils.driver= driver driver.get("http://downloadcenter.netgear.com/") prevTrail=[] # tmr = ClickOutOverlayTimer() # tmr.start() walkProdCat() except Exception as ex: traceback.print_exc(); ipdb.set_trace() driver.save_screenshot('netgear_exc.png') finally: driver.quit() conn.close()
def main(): global startTrail,prevTrail, driver,conn try: startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]] uprint('startTrail=%s'%startTrail) conn=sqlite3.connect('tplink.sqlite3') sql( "CREATE TABLE IF NOT EXISTS TFiles(" "id INTEGER NOT NULL," "model TEXT," "revision TEXT," "fw_date DATE," "fw_ver TEXT," "fw_desc TEXT," "file_name TEXT," "file_size INTEGER," "page_url TEXT," "file_url TEXT," "tree_trail TEXT," "file_sha1 TEXT," "PRIMARY KEY (id)" "UNIQUE(model,revision,file_name)" ");") driver=harvest_utils.getFirefox() harvest_utils.driver=driver driver.get('http://www.tp-link.com/en/download-center.html') prevTrail=[] marketWalker() driver.quit() conn.close() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_exc.png')
def enterElem(e:WebElement, func): prev_url = driver.current_url uprint('prev_url='+prev_url) next_url = e.get_attribute('href') uprint('next_url='+next_url) driver.get(next_url) func(prev_url)
def main(): global startTrail,prevTrail startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]] uprint('startTrail=%s'%startTrail) global driver,conn conn=sqlite3.connect('cisco.sqlite3') sql( "CREATE TABLE IF NOT EXISTS TFiles(" "id INTEGER NOT NULL," "model TEXT," "fw_date DATE," "fw_ver TEXT," "file_title TEXT," "file_name TEXT," "file_size INTEGER," "need_contract INTEGER," # 1=needContract, -1=Deferral "page_url TEXT," "tree_trail TEXT," # pssub_7_1_1_0_0_0 => 7_1_1_0_0_0 "file_sha1 TEXT," "PRIMARY KEY (id)" "UNIQUE(model,fw_ver,file_name,fw_date)" ");") driver=harvest_utils.getFirefox(path.abspath('cisco_files'), 2, False) driver.implicitly_wait(2.0) harvest_utils.driver=driver driver.get('https://software.cisco.com/download/') prevTrail=[] treeWalker() prevTrail.pop()
def enterFrame(iframeId:str): global driver prev_url=driver.current_url url=waitVisible('iframe[id=%s]'%iframeId).get_attribute('src') ulog('%s => %s'%(prev_url,url)) driver.get(url) try: retryUntilTrue(isReadyState, 10, 2) except TimeoutException as ex: print(ex) pass
def main(): startPfxIdx = int(sys.argv[1]) if len(sys.argv)>1 else 1 startSfxIdx = int(sys.argv[2]) if len(sys.argv)>2 else 1 global driver,conn harvest_utils.driver=getFirefox(dlDir) driver = harvest_utils.driver conn=sqlite3.connect('dlink_tsd.sqlite3') csr=conn.cursor() csr.execute("CREATE TABLE IF NOT EXISTS dlink(" "model TEXT," "file_name TEXT PRIMARY KEY," "desc TEXT," "href TEXT," "file_sha1 TEXT)" ); conn.commit() driver.get('http://tsd.dlink.com.tw/') modelPfxSel = Select(waitClickable( 'select.quickFindAndSearchForm:nth-child(4)')) numModelPfx=len(modelPfxSel.options) for pfxIdx in range(startPfxIdx,numModelPfx): modelPfxSel.select_by_index(pfxIdx) modelSfxSel = Select(waitClickable( 'select.quickFindAndSearchForm:nth-child(6)')) numModelSfx=len(modelSfxSel.options) for sfxIdx in range(startSfxIdx,numModelSfx): print("pfxIdx=%d, sfxIdx=%d"%(pfxIdx,sfxIdx)) startSfxIdx=1 modelSfxSel.select_by_index(sfxIdx) pfxTxt =modelPfxSel.options[pfxIdx].text sfxTxt =modelSfxSel.options[sfxIdx].text modelName=pfxTxt+'-'+sfxTxt print("Page1: modelName=",modelName) goBtn=waitClickable('.prodtd > p:nth-child(3) > a:nth-child(7)') goBtn.click() harvestPage2() driver.back() modelPfxSel = Select(waitClickable( 'select.quickFindAndSearchForm:nth-child(4)')) modelPfxSel.select_by_index(pfxIdx) modelSfxSel = Select(waitClickable( 'select.quickFindAndSearchForm:nth-child(6)')) # wait until all '.part' vanished while True: files = os.listdir(dlDir) downloading = [_ for _ in files if _.endswith('.part')] if downloading: print('-- Downloading : %s wait 3 seconds'%downloading) time.sleep(3) else: break print('-- terminate firefox') driver.quit()
def selectProduct(prev_url): global category, prevTrail, searchResultsNotification,driver try: searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() products=getElems('.items a') retryUntilTrue(lambda:ulog('products=%s'%[(i,_.text) for i,_ in enumerate(products)])>=0) numProducts=len(products) startIdx=getStartIdx() for idx in range(startIdx,numProducts): ulog('click %s,"%s"'%(idx,products[idx].text)) prevTrail+=[idx] enterElem(products[idx],selectSupport) prevTrail.pop() products=getElems('.items a') driver.get(prev_url) searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def selectCategory(prev_url): global category, prevTrail, searchResultsNotification,driver try: if len(prevTrail)==1: try: waitVisible('.filter-list', 30, 0.4) except TimeoutException: ulog('No search results, url=%s'%driver.current_url) driver.get(prev_url) return searchResultsNotification=waitText('.search-results-notification').strip() # Your search for f returned 4196 results elif len(prevTrail)==2: searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() # Your search for f returned 67 results ulog('%s'%searchResultsNotification) category = waitText('.accordion-activate a') ulog('category="%s"'%category) cats=getElems('.filter-list a') retryUntilTrue(lambda:ulog('cats=%s'%[(i,_.text)for i,_ in enumerate(cats)])) numCats=len(cats) startIdx = getStartIdx() for idx in range(startIdx, numCats): ulog('click %s,"%s"'%(idx,cats[idx].text)) prevTrail+=[idx] if len(prevTrail)==2: enterElem(cats[idx], selectCategory) else: enterElem(cats[idx], selectProduct) prevTrail.pop() cats = getElems('.filter-list a') if prev_url: driver.get(prev_url) searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def main(): global startTrail, prevTrail,driver,conn try: startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]] ulog('startTrail=%s'%startTrail) conn=sqlite3.connect('belkin.sqlite3') sql("CREATE TABLE IF NOT EXISTS TFiles(" "id INTEGER NOT NULL," "category TEXT," # ROUTER > N900 DB Wireless Router "product_name TEXT," # Advance N900 Dual-Band Wireless Router "model TEXT," # F9K1104 "rel_date DATE," # Post Date: 06/20/2012 "fw_ver TEXT," # Download version: 1.00.23 "file_size INTEGER," # Size: 3.74 MB "page_url TEXT," # http://belkin.force.com/Articles/articles/en_US/Download/7371 "download_url TEXT," # http://nextnet.belkin.com/update/files/F9K1104/v1/WW/F9K1104_WW_1.0.23.bin "tree_trail TEXT," # [26, 2, 1, 0, 0] "file_sha1 TEXT," # 5d3bc16eec2f6c34a5e46790b513093c28d8924a "PRIMARY KEY (id)" "UNIQUE(product_name,model,rel_date,fw_ver)" ")") driver=harvest_utils.getFirefox() # driver.implicitly_wait(2.0) harvest_utils.driver=driver startIdx=getStartIdx() for idx in range(startIdx, len(keywords)): keyword = parse.quote_plus(keywords[idx]) ulog('idx=%s, search "%s"'%(idx,keyword)) driver.get('http://www.belkin.com/us/support-search?search=%s' %keyword) prevTrail+=[idx] selectCategory(None) prevTrail.pop() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def selectSupport(prev_url): global prevTrail,category,productName,model,driver CSS=driver.find_element_by_css_selector try: waitVisible('.product-name-price') productName=CSS('.product-name-price h2').text.strip() ulog('productName="%s"'%productName) # 'Wireless G Travel Router' model=CSS('.product-name-price p').text.strip() # 'Part # F5D7233' model = model.split('#')[1].strip() ulog('model="%s"'%model) # 'F5D7233' if not productName: ulog('productName is empty, bypass!') driver.get(prev_url) # waitText('.search-results-notification') return try: support = next(_ for _ in getElems('.icon-list-header-container') if getElemText(_).startswith('DOWNLOAD')) except StopIteration: ulog('No download in '+driver.current_url) trailStr=str(prevTrail) sql("INSERT OR REPLACE INTO TFiles(category, product_name, model, tree_trail) VALUES (:category, :model, :productName, :trailStr)", glocals()) ulog('UPSERT "%(category)s", "%(model)s", "%(productName)s" %(prevTrail)s'%glocals()) driver.get(prev_url) # waitText('.search-results-notification') return downloads = support.find_elements_by_css_selector('a') numDownloads = len(downloads) startIdx=getStartIdx() for idx in range(startIdx, numDownloads): txt=downloads[idx].text if model not in txt: ulog('bypass %s,"%s" because it\'s Portal'%(idx,txt)) continue ulog('click %s,"%s"'%(idx,txt)) prevTrail += [idx] enterElem(downloads[idx],selectDownload) prevTrail.pop() support = next(_ for _ in getElems('.icon-list-header-container') if getElemText(_).startswith('DOWNLOAD')) downloads = support.find_elements_by_css_selector('a') driver.get(prev_url) # waitText('.search-results-notification') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def selectDownload(prev_url): global driver,category,productName,model,prevTrail try: # switch to frame try: pageUrl=waitVisible('iframe[name~=inlineFrame]',30,0.4).get_attribute('src') except TimeoutException: ulog('url= '+driver.current_url) driver.get(prev_url) # http://www.belkin.com/us/support-article?articleNum=4879 driver.get(pageUrl) # convert html to Markdown Text page_src = waitVisible('.sfdc_richtext').get_attribute('innerHTML') h = html2text.HTML2Text() h.ignore_emphasis=True h.body_width=0 artTxt = h.handle(page_src) startIdx=getStartIdx() for idx in range(startIdx, sys.maxsize): try: fileSize,relDate,fwVer,downUrl=getSizeDateVersion(artTxt, idx) except StopIteration: break prevTrail+=[idx] trailStr=str(prevTrail) sql("INSERT OR REPLACE INTO TFiles(" " category, product_name, model" ",rel_date,fw_ver,file_size,page_url,download_url,tree_trail)" " VALUES" "(:category, :productName, :model," ":relDate,:fwVer,:fileSize,:pageUrl,:downUrl,:trailStr)", glocals()) ulog('UPSERT "%(category)s", "%(productName)s", "%(model)s",' ' "%(relDate)s", "%(fwVer)s", %(fileSize)s,' ' "%(downUrl)s", %(prevTrail)s '%glocals()) prevTrail.pop() driver.get(prev_url) # waitVisible('.product-name-price') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def goToUrl(url:str): global driver ulog('%s'%url) driver.get(url) waitUntil(isReadyState)
def main(): startCatIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 startFamIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0 startPrdIdx = int(sys.argv[3]) if len(sys.argv)>3 else 0 global driver,conn harvest_utils.driver=getFirefox(dlDir) driver = harvest_utils.driver conn=sqlite3.connect('netgear.sqlite3') csr=conn.cursor() csr.execute("CREATE TABLE IF NOT EXISTS TFiles(" "brand TEXT," "category TEXT," "family TEXT," "product TEXT,"# -- is model "desc TEXT,"# -- is fileName "href TEXT," "file_sha1 TEXT," "PRIMARY KEY (product,desc)" ")"); conn.commit() driver.get('http://downloadcenter.netgear.com/') #click DrillDown waitClickable('#ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_BasicSearchPanel_btnAdvancedSearch').click() # # wait Page2 try: catSel=Select(waitClickable(catSelCss)) numCat=len(catSel.options) for catIdx in range(startCatIdx,numCat): catSel=Select(waitClickable(catSelCss)) print('catIdx=',catIdx) catTxt=catSel.options[catIdx].text uprint('catTxt='+catTxt) catSel.select_by_index(catIdx) waitTextChanged(famSelCss) famSel=Select(waitClickable(famSelCss)) numFam=len(famSel.options) for famIdx in range(startFamIdx,numFam): famSel=Select(waitClickable(famSelCss)) print('famIdx=',famIdx) startFamIdx=0 famTxt =famSel.options[famIdx].text uprint('famTxt='+famTxt) famSel.select_by_index(famIdx) waitTextChanged(prdSelCss) prdSel=Select(waitClickable(prdSelCss)) numPrd=len(prdSel.options) for prdIdx in range(startPrdIdx,numPrd): prdSel=Select(waitClickable(prdSelCss)) startPrdIdx=0 print("catIdx=%d, famIdx=%d, prdIdx=%d"%(catIdx,famIdx,prdIdx)) prdTxt=prdSel.options[prdIdx].text uprint('cat,fam,prd=("%s","%s","%s")'%(catTxt,famTxt,prdTxt)) prdWaiting = waitElem(prdWaitingCss) prdSel.select_by_index(prdIdx) WebDriverWait(driver, 5, poll_frequency=0.5).\ until(lambda x:prdWaiting.is_displayed()==True) WebDriverWait(driver, 60, poll_frequency=0.5).\ until(lambda x:prdWaiting.is_displayed()==False) #waitUntil(lambda:prdWaiting.is_displayed()==True) #waitUntil(lambda:prdWaiting.is_displayed()==False) numResults=waitText(numResultsCss,3) print('numResults=',numResults) if numResults is None: continue numResults=int(re.search(r"\d+", numResults).group(0)) if numResults >10: showMore=waitClickable("#lnkAllDownloadMore",3) showMore.click() try: erItems=getElems('a.register-product.navlistsearch',3) except TimeoutException: erItems=getElems('div#LargeFirmware > ul > li > div > p > a.navlistsearch',3) if len(erItems) != numResults: print('Error, numResults=%d, but len(erItems)=%d' %(numResults,len(erItems))) for erItem in erItems: if not erItem.is_displayed(): continue desc=getElemText(erItem) uprint('desc="%s"'%desc) href=erItem.get_attribute('data-durl') if not href: href=erItem.get_attribute('href') print('href=',href) if not href.startswith('http'): print('Error: href=',href) sql("INSERT OR REPLACE INTO TFiles" "(brand,category,family,product,desc,href)VALUES" "('Netgear',:catTxt,:famTxt,:prdTxt,:desc,:href)", locals()) uprint('INSERT ' '("%(catTxt)s","%(famTxt)s","%(prdTxt)s","%(desc)s","%(href)s")' %locals()) except Exception as ex: import ipdb; ipdb.set_trace() print(ex) import traceback; traceback.print_exc() print('-- terminate firefox') driver.quit()
def main(): startModelIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 startRevisionIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0 brand='Linksys' global driver,conn harvest_utils.driver=getFirefox() driver = harvest_utils.driver conn=sqlite3.connect('Linksys.sqlite3') csr=conn.cursor() csr.execute( "CREATE TABLE IF NOT EXISTS TFiles(" "brand TEXT," "model TEXT," "revision TEXT," # hardware version "fw_date DATE," "fw_ver TEXT," "file_title TEXT," "file_size INTEGER," "href TEXT," "file_sha1 TEXT," "PRIMARY KEY (brand,model,revision,file_title)" ");") conn.commit() driver.get('http://www.linksys.com/us/support/sitemap/') try: numModels = getNumElem('.item ul li a') print('numModels=',numModels) for modelIdx in range(startModelIdx, numModels): startModelIdx=0 modelElm = getElems('.item ul li a')[modelIdx] modelText = getElemText(modelElm, 5) print('modelIdx=',modelIdx) uprint('modelText="%s"'%modelText) # guess Possible Model model = guessModel(modelText) print('model=',model) rows = csr.execute( "SELECT model from TFiles WHERE model=:model",locals() ).fetchall() if rows: print('model "%s" already in TFiles, bypass!!'%model) continue modelElm.click() # click 'Download Software' try: waitClickable('a[title="Download Software"]', 40).click() except TimeoutException: print('No "Download Software" link found, bypass!!') csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,'')", locals()) conn.commit() print('INSERT model="%s"'%model) driver.back() continue # enumerate all accordians accordians = getElems('.article-accordian', 10) numAccordians=len(accordians) print('numAccordians=',numAccordians) print('driver.current_url=', driver.current_url) for revisionIdx in range(startRevisionIdx, numAccordians): startRevisionIdx=0 accordians = getElems('.article-accordian') # expand accordian (one-based) accordian = accordians[revisionIdx] revisionTxt = getElemText(accordian) print('revisionIdx=',revisionIdx) uprint('revisionTxt="%s"'%revisionTxt) revision = guessRevision(revisionTxt) print('revision=',revision) divId = accordian.get_attribute('data-collapse-target') # expand accordian 'revision'='Hardware Version' driver.execute_script( "document.querySelectorAll('.article-accordian')[%d].click()" %(revisionIdx)) divElm = waitVisible('#'+divId) divTxt = getElemTextUntilStabled(divElm,10,2.5) assert divTxt uprint('divTxt="%s"'%divTxt) numDowns = getCount(divTxt, 'Download') if numDowns ==0: csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,:revision)",locals()) conn.commit() print('INSERT "%(model)s","%(revision)s"'%locals()) continue downElms =iter(divElm.find_elements_by_css_selector('a')) lastSpanEnd=0 for downIdx in range(numDowns): spanBegin = getNthIndex(divTxt, downIdx, 'Download') spanEnd = divTxt.find('\n', spanBegin+len('Download')) if spanEnd==-1: spanEnd=len(divTxt) foreword='\n'.join(reversed(divTxt[lastSpanEnd:spanEnd].splitlines())) fwDate=guessDate(foreword) fileSize = guessFileSize(foreword) fwVer = guessVersion(foreword) if fwVer: fileTitle = guessFileTitle(foreword, fwVer) else: fileTitle = guessFileTitle2(foreword) while True: downElm = next(downElms) if downElm.text.strip().startswith('Download'): break href=downElm.get_attribute('href') lastSpanEnd=spanEnd csr.execute( "INSERT OR REPLACE INTO TFiles(brand,model,revision," "fw_date, fw_ver, file_title, file_size, " "href) VALUES (:brand,:model,:revision," ":fwDate, :fwVer, :fileTitle," ":fileSize, :href)", locals()) conn.commit() uprint("INSERT '%(model)s', '%(revision)s', '%(fwDate)s'" ", '%(fwVer)s', '%(fileTitle)s', '%(fileSize)d'" ", '%(href)s'" %locals()) driver.back() driver.back() except http.client.IncompleteRead as ex: print(ex) import traceback; traceback.print_exc() print('-- Selenium exhausted') driver.quit() except Exception as ex: import ipdb; ipdb.set_trace() print(ex) print('driver.current_url=',driver.current_url) import traceback; traceback.print_exc() print('-- terminate firefox') driver.quit()