def queryByInput(broswer, area, tag): inputElement = util.findElementById(broswer, "sole-input") time.sleep(3) inputElement.send_keys(" " + area.decode("utf-8") + " " + tag.decode("utf-8")) #.decode("utf-8") time.sleep(1) # inputElement.send_keys(" " + tag.decode("utf-8"))#.decode("utf-8") time.sleep(3) gotoQuery(broswer) element = util.findElementById(broswer, "cards-level1") ActionChains(broswer).move_to_element(element).perform() time.sleep(2)
def chooseArea(broswer, area, type_): time.sleep(2) areaElement = util.findElementsByCssSelector( broswer, "li[data-type=\"city-panel\"]", False) time.sleep(1) areaElement.click() time.sleep(1) util.switchToCurrentWindow(broswer) ulElement = util.findElementsByClassName(broswer, "hd-list-wrap", False) time.sleep(1) # li = util.findElementsByLinkText(ulElement,area,False) # time.sleep(1) # li.click() # time.sleep(1) areaElement = util.findElementsByCssSelector( ulElement, 'li[data-value="' + area + '"]', False) time.sleep(1) ActionChains(broswer).move_to_element(areaElement).perform() time.sleep(1) # lis = ulElement.find_elements_by_tag_name("li") # for li in lis: # if area in util.getInnerHtml(li).encode("utf-8"): # time.sleep(1) # ActionChains(broswer).move_to_element(li).perform() # time.sleep(1) # break bsAreaList = util.findElementById(broswer, "bsAreaList") areaAll = util.findElementsByLinkText(bsAreaList, type_, False) areaAll.click() time.sleep(1)
def getAllArea(broswer): time.sleep(2) areaElement = util.findElementsByCssSelector( broswer, "li[data-type=\"city-panel\"]", False) time.sleep(1) areaElement.click() time.sleep(1) util.switchToCurrentWindow(broswer) ulElement = util.findElementsByClassName(broswer, "hd-list-wrap", False) time.sleep(1) lis = ulElement.find_elements_by_tag_name("li") result = {} for li in lis: time.sleep(1) ActionChains(broswer).move_to_element(li).perform() time.sleep(1) areaName = li.get_attribute("data-value") bsAreaList = util.findElementById(broswer, "bsAreaList") li_node = bsAreaList.find_elements_by_tag_name("li") result[areaName] = [] for item in li_node: typeName = item.find_elements_by_tag_name("a")[0].get_attribute( "innerHTML") result[areaName].append(typeName) print len(result[areaName]) print len(result) return result
def queryByChooseLabel(broswer, area, area_type, tag, tag_type): chooseArea(broswer, area, area_type) time.sleep(2) element = util.findElementById(broswer, "cards-level1") ActionChains(broswer).move_to_element(element).perform() time.sleep(3) chooseType(broswer, tag, tag_type) ActionChains(broswer).move_to_element(element).perform() time.sleep(3)
def getReady(): # driverType = "phantomjs.exe" driverType = "geckodriver.exe" # driverType = "chromedriver.exe" running_pids1 = psutil.pids() broswer = util.createBrowser("https://map.baidu.com/") time.sleep(3) running_pids2 = psutil.pids() pid = util.getPid(running_pids1, running_pids2, driverType) util.switchToCurrentWindow(broswer) inputElement = util.findElementById(broswer, "sole-input") inputElement.clear() inputElement.send_keys(cityName.decode("utf-8")) #.decode("utf-8") searchBtn = util.findElementById(broswer, "search-button") searchBtn.click() time.sleep(2) inputElement.clear() inputElement.click() caterElement = util.findElementsByClassName(broswer, "city-special-item") caterElement = caterElement[0] caterElement.click() return pid, broswer
def main(cityName, area, area_type, tag, tag_type): pid = None broswer = None try: pid, broswer = getReady() # queryByChooseLabel(broswer,area,area_type,tag,tag_type) queryByInput(broswer, area, tag) hasNext = True i = 1 while hasNext: print i i += 1 if i >= 60: break poiElement = util.findElementsByClassName(broswer, "poilist")[0] items = poiElement.find_elements_by_tag_name("li") already_scraw = file_func.readFileToList("data/uid.txt") for item in items: pageSource = util.getInnerHtml(item) uid, poiInfo = parsePOI_Info(pageSource) if uid not in already_scraw: print uid + " not in" file_func.listAppendToTxt("data/info.txt", poiInfo) file_func.writeToTxt("data/uid.txt", uid) else: print uid + " in" time.sleep(1) hasNext = goToNextPage(broswer) # if len(util.findElementsByClassName(broswer,"no_result_title")) > 0: # gotoQuery(broswer) if getItemCount(broswer) == 0: gotoQuery(broswer) util.switchToCurrentWindow(broswer) time.sleep(2) cardElement = util.findElementById(broswer, "toast-wrapper") time.sleep(2) ActionChains(broswer).move_to_element(cardElement).perform() time.sleep(10) # poiElement = util.findElementsByClassName(broswer,"poilist")[0] # ActionChains(broswer).move_to_element(poiElement).perform() util.closeAllHandle(broswer) print pid util.terminatePid(pid) except Exception as e: print e util.closeAllHandle(broswer) print pid util.terminatePid(pid)
def chooseOrder(broswer, sortType): # sortType ["data_type,0","overall_rating,0","hot_value,0","price,0","price,1"] time.sleep(1) areaElement = util.findElementsByCssSelector( broswer, "li[data-type=\"sort-panel\"]", False) time.sleep(2) areaElement.click() time.sleep(1) sortElement = util.findElementById(broswer, "select_panel_sort") liElement = util.findElementsByCssSelector( sortElement, 'a[data-value="sort:' + sortType + '"]', False) time.sleep(1) ActionChains(broswer).move_to_element(liElement).perform() liElement.click() time.sleep(4)
def gotoQuery(broswer): searchBtn = util.findElementById(broswer, "search-button") searchBtn.click() time.sleep(5)