def GetContent(self): #data base cur = self.connection.cursor() #db 테이블 만들기 cur.execute("CREATE TABLE "+ self.table_name +" (category, content, content_detail);") categories = self.GetCategory() # My_driver = webdriver 객체 my_driver = webdriver.Chrome(self.webdriver_address) # 웹 자원 로드를 위해 기다리는 시간 설정 - 기본적으로는 다 로드될 때까지 기다려준다 my_driver.implicitly_wait(self.__waiting_time) # web connect my_driver.get(self.page_address) #self.__categories에서 얻은 정보 이용 for category in categories: my_driver.find_element(By.LINK_TEXT(category)).send_keys(Keys.ENTER) page_source = my_driver.page_source my_soup = BeautifulSoup(page_source,self.parser) contents_source = my_soup.select(self.content_tag) #콘텐츠 저장 .db file for content_source in contents_source: my_driver.find_element(By.LINK_TEXT(content_source.text)).send_keys(Keys.ENTER) article_page_source = my_driver.page_source my_soup_for_article = BeautifulSoup(article_page_source,self.parser) detail_contents_source = my_soup_for_article.select(self.detail_content_tag) for detail_content_source in detail_contents_source: cur.execute("INSERT INTO " +self.table_name +" VALUES(?,?,?)", (category, content_source.text,detail_content_source.text)) self.connection.commit() my_driver.back() return
def main(): option = webdriver.ChromeOptions() option.add_argument('--start-maximized') option.add_argument( '--user-agent=Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36' ) # prefs = { # 'profile.default_content_setting_values': { # 'images': 2 # } # } # option.add_experimental_option('prefs', prefs) #不加载图片 # option.set_preference('permissions.default.stylesheet', 2) #不加载css browser = webdriver.Chrome(chrome_options=option) browser.get('https://www.taobao.com/') if browser.find_element_by_link_text("亲,请登录"): browser.find_element_by_link_text("亲,请登录").click() print("请在60秒内完成扫码") time.sleep(60) browser.get('https://cart.taobao.com/cart.htm') time.sleep(3) if browser.find_element(By.CSS_SELECTOR, "#J_SelectAll1"): browser.find_element(By.CSS_SELECTOR, "#J_SelectAll1").click() while True: now = datetime.datetime.now() now_str = now.strftime('%Y-%m-%d %H:%M:%S.%f') buy_time = datetime.datetime.strptime(buy_time_str, "%Y-%m-%d %H:%M:%S") print('current time is :%s' % now_str) if now > buy_time: try: if browser.find_element(By.CSS_SELECTOR, "#J_Go"): browser.find_element(By.CSS_SELECTOR, "#J_Go").click() WebDriverWait(browser, 2, poll_frequency=0.5).until( expected_conditions.visibility_of( By.LINK_TEXT('提交订单'))) browser.find_element_by_link_text('提交订单').click() print('OK!') time.sleep(5) else: time.sleep(0.1) continue except: time.sleep(0.1) continue time.sleep(0.1)
def getLocator(self, locator): by = None key = locator.split(":") if len(key) == 1: by = By.ID(locator) elif len(key) == 2: key[0] = key[0].lower() key[1] = key[1].strip() if key[0] == "id": by = By.ID(key[1]) if key[0] == "name": by = By.NAME(key[1]) if key[0] == "xpath": by = By.XPATH(key[1]) if key[0] == "linktext": by = By.LINK_TEXT(key[1]) elif key[0] == "particallinktext": by = By.PARTIAL_LINK_TEXT(key[1]) else: print("NO LOCATOR MATCHED") return by
def to_book(self, book): self.driver.find_element(By.LINK_TEXT(book)).click() return ProductPage(self.driver)