def get_attraction_urls(search_results):
     urls = []
     page = PyQuery(search_results, parser='html')
     for item in page(".result").items():
         attribute = PyQuery(item).children().attr['onclick']
         urls.append(attribute[attribute.index('\'/') + 2: attribute.index('\')')])
     if len(urls) == 0:
         for item in page(".listing_title").items():
             attribute = PyQuery(item).children().attr['href']
             urls.append(attribute[1:])
     return urls
Пример #2
0
 def get_attraction_urls(search_results):
     urls = []
     page = PyQuery(search_results, parser='html')
     for item in page(".result").items():
         attribute = PyQuery(item).children().attr['onclick']
         urls.append(attribute[attribute.index('\'/') +
                               2:attribute.index('\')')])
     if len(urls) == 0:
         for item in page(".listing_title").items():
             attribute = PyQuery(item).children().attr['href']
             urls.append(attribute[1:])
     return urls
Пример #3
0
def get_info(detail_url):
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    }
    try:
        res = requests.get(detail_url, headers=headers).text
        print(res)
        nonce = re.findall('window\["n.*?e"\]\s=\s(.*?);', res)[1]
        data = re.findall('var DATA.*?\'(.*?)\'', res)[0]
        chapter = re.findall('title-comicHeading\">(.*?)<',
                             res)[0].replace(' ', '')
        chapters = PyQuery(res)(
            '#catalogueList li .tool_chapters_list_title').text().split()
        chapter = str(chapters.index(chapter) + 1).rjust(3, '0') + chapter
        chapters = [
            str(chapters.index(chapter) + 1).rjust(3, '0') + chapter
            for chapter in chapters
        ]
        name = re.findall('<title>《(.*?)》', res)[0]
        return (nonce, data, chapter, chapters, name)
    except Exception as er:
        print(detail_url, er)
Пример #4
0
 def verify_token(self):
     for x in range(5):
         try:
             WebDriverWait(self.driver, wait).until(
                 EC.element_to_be_clickable((By.CSS_SELECTOR, "body > app-root > app-golden-main > app-golden-content > main > div > div > aside.block-setting > div > div.card-body.d-flex.flex-column > div.overflow-container.flex-grow-1 > ul > li")))
             HTML = self.driver.find_elements_by_css_selector(".card-control")[
                         0].get_attribute("innerHTML")
             Doc = PQ(HTML)
             Doc = Doc('.list-group-item-action').text()
             Doc = Doc.replace(" ", "\n")
             Doc = Doc.split("\n")
             # print(Doc)
             path = Doc.index("表名")
             pathh = '//*[@id="dp_ads.' + Doc[path+1] +'"]'
             self.driver.find_element_by_xpath(pathh).click()
             break
         except:
             time.sleep(3)
     return "dp_ads." + Doc[path+1]
Пример #5
0
    def click_dataset(self,lan):
        #---PyQuery→Xpath---
        for x in range(5):
            try:
                HTML = self.driver.find_elements_by_css_selector(".card-control")[
                            0].get_attribute("innerHTML")
                Doc = PQ(HTML)
                Doc = Doc('.list-group-item-action').text()
                Doc = Doc.replace(" ", "\n")
                Doc = Doc.split("\n")
                # print(Doc)
                path = Doc.index(lan)
                pathh = '//*[@id="dp_ads.' + Doc[path+1] +'"]'
                self.driver.find_element_by_xpath(pathh).click()
                break
            except:
                time.sleep(3)

        #對照頁面上的→維度條件
        WebDriverWait(self.driver, wait).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "body > app-root > app-golden-main > app-golden-content > main > div > div > aside.block-fitler > div > div.card-body > h3:nth-child(3)")))
        check = self.driver.find_element_by_css_selector("body > app-root > app-golden-main > app-golden-content > main > div > div > aside.block-fitler > div > div.card-body > h3:nth-child(3)").get_attribute("innerText")
        return check