Exemplo n.º 1
0
def queryByInput(broswer, area, tag):
    inputElement = util.findElementById(broswer, "sole-input")
    time.sleep(3)
    inputElement.send_keys(" " + area.decode("utf-8") + " " +
                           tag.decode("utf-8"))  #.decode("utf-8")
    time.sleep(1)
    # inputElement.send_keys(" " + tag.decode("utf-8"))#.decode("utf-8")
    time.sleep(3)
    gotoQuery(broswer)
    element = util.findElementById(broswer, "cards-level1")
    ActionChains(broswer).move_to_element(element).perform()
    time.sleep(2)
Exemplo n.º 2
0
def chooseArea(broswer, area, type_):
    time.sleep(2)
    areaElement = util.findElementsByCssSelector(
        broswer, "li[data-type=\"city-panel\"]", False)
    time.sleep(1)
    areaElement.click()
    time.sleep(1)
    util.switchToCurrentWindow(broswer)
    ulElement = util.findElementsByClassName(broswer, "hd-list-wrap", False)
    time.sleep(1)
    # li = util.findElementsByLinkText(ulElement,area,False)
    # time.sleep(1)
    # li.click()
    # time.sleep(1)
    areaElement = util.findElementsByCssSelector(
        ulElement, 'li[data-value="' + area + '"]', False)
    time.sleep(1)
    ActionChains(broswer).move_to_element(areaElement).perform()
    time.sleep(1)

    # lis = ulElement.find_elements_by_tag_name("li")
    # for li in lis:
    # 	if area in util.getInnerHtml(li).encode("utf-8"):
    # 		time.sleep(1)
    # 		ActionChains(broswer).move_to_element(li).perform()
    # 		time.sleep(1)
    # 		break
    bsAreaList = util.findElementById(broswer, "bsAreaList")
    areaAll = util.findElementsByLinkText(bsAreaList, type_, False)
    areaAll.click()
    time.sleep(1)
Exemplo n.º 3
0
def getAllArea(broswer):
    time.sleep(2)
    areaElement = util.findElementsByCssSelector(
        broswer, "li[data-type=\"city-panel\"]", False)
    time.sleep(1)
    areaElement.click()
    time.sleep(1)
    util.switchToCurrentWindow(broswer)
    ulElement = util.findElementsByClassName(broswer, "hd-list-wrap", False)
    time.sleep(1)
    lis = ulElement.find_elements_by_tag_name("li")
    result = {}
    for li in lis:
        time.sleep(1)
        ActionChains(broswer).move_to_element(li).perform()
        time.sleep(1)
        areaName = li.get_attribute("data-value")
        bsAreaList = util.findElementById(broswer, "bsAreaList")
        li_node = bsAreaList.find_elements_by_tag_name("li")
        result[areaName] = []
        for item in li_node:
            typeName = item.find_elements_by_tag_name("a")[0].get_attribute(
                "innerHTML")
            result[areaName].append(typeName)
        print len(result[areaName])
    print len(result)
    return result
Exemplo n.º 4
0
def queryByChooseLabel(broswer, area, area_type, tag, tag_type):
    chooseArea(broswer, area, area_type)
    time.sleep(2)
    element = util.findElementById(broswer, "cards-level1")
    ActionChains(broswer).move_to_element(element).perform()
    time.sleep(3)
    chooseType(broswer, tag, tag_type)
    ActionChains(broswer).move_to_element(element).perform()
    time.sleep(3)
Exemplo n.º 5
0
def getReady():
    # driverType = "phantomjs.exe"
    driverType = "geckodriver.exe"
    # driverType = "chromedriver.exe"
    running_pids1 = psutil.pids()
    broswer = util.createBrowser("https://map.baidu.com/")
    time.sleep(3)
    running_pids2 = psutil.pids()
    pid = util.getPid(running_pids1, running_pids2, driverType)
    util.switchToCurrentWindow(broswer)
    inputElement = util.findElementById(broswer, "sole-input")
    inputElement.clear()
    inputElement.send_keys(cityName.decode("utf-8"))  #.decode("utf-8")
    searchBtn = util.findElementById(broswer, "search-button")
    searchBtn.click()
    time.sleep(2)
    inputElement.clear()
    inputElement.click()
    caterElement = util.findElementsByClassName(broswer, "city-special-item")
    caterElement = caterElement[0]
    caterElement.click()
    return pid, broswer
Exemplo n.º 6
0
def main(cityName, area, area_type, tag, tag_type):
    pid = None
    broswer = None
    try:
        pid, broswer = getReady()

        # queryByChooseLabel(broswer,area,area_type,tag,tag_type)

        queryByInput(broswer, area, tag)

        hasNext = True
        i = 1
        while hasNext:
            print i
            i += 1
            if i >= 60:
                break
            poiElement = util.findElementsByClassName(broswer, "poilist")[0]
            items = poiElement.find_elements_by_tag_name("li")
            already_scraw = file_func.readFileToList("data/uid.txt")
            for item in items:
                pageSource = util.getInnerHtml(item)
                uid, poiInfo = parsePOI_Info(pageSource)
                if uid not in already_scraw:
                    print uid + " not in"
                    file_func.listAppendToTxt("data/info.txt", poiInfo)
                    file_func.writeToTxt("data/uid.txt", uid)
                else:
                    print uid + " in"
            time.sleep(1)
            hasNext = goToNextPage(broswer)
            # if len(util.findElementsByClassName(broswer,"no_result_title")) > 0:
            # 	gotoQuery(broswer)
            if getItemCount(broswer) == 0:
                gotoQuery(broswer)
            util.switchToCurrentWindow(broswer)
            time.sleep(2)
            cardElement = util.findElementById(broswer, "toast-wrapper")
            time.sleep(2)
            ActionChains(broswer).move_to_element(cardElement).perform()
            time.sleep(10)
            # poiElement = util.findElementsByClassName(broswer,"poilist")[0]
            # ActionChains(broswer).move_to_element(poiElement).perform()
        util.closeAllHandle(broswer)
        print pid
        util.terminatePid(pid)
    except Exception as e:
        print e
        util.closeAllHandle(broswer)
        print pid
        util.terminatePid(pid)
Exemplo n.º 7
0
def chooseOrder(broswer, sortType):
    # sortType ["data_type,0","overall_rating,0","hot_value,0","price,0","price,1"]
    time.sleep(1)
    areaElement = util.findElementsByCssSelector(
        broswer, "li[data-type=\"sort-panel\"]", False)
    time.sleep(2)
    areaElement.click()
    time.sleep(1)
    sortElement = util.findElementById(broswer, "select_panel_sort")
    liElement = util.findElementsByCssSelector(
        sortElement, 'a[data-value="sort:' + sortType + '"]', False)
    time.sleep(1)
    ActionChains(broswer).move_to_element(liElement).perform()
    liElement.click()
    time.sleep(4)
Exemplo n.º 8
0
def gotoQuery(broswer):
    searchBtn = util.findElementById(broswer, "search-button")
    searchBtn.click()
    time.sleep(5)