def login_website(chrome_webdriver: WebDriver, email: str, password: str) -> None: """ Login at Brainscape """ chrome_webdriver.find_element_by_class_name("login-link").click() chrome_webdriver.find_element_by_id("email").send_keys(email) chrome_webdriver.find_element_by_id("password").send_keys(password) chrome_webdriver.find_element_by_xpath( "//span[contains(@class, 'label') and text() = 'Log In']").click() sleep(3)
def info_take_from(brower: WebDriver, tag: str) -> List[str]: ''' 提取附近内容的店面信息 :param brower: 浏览器状态 :param tag: 标签,例如:美食,住宿 :return: 列表元素是 每个页面的信息 ''' # 点击第一个搜索结果,再点击附近,输入搜索内容 brower.find_element_by_xpath('//*[@id="card-1"]/div/ul/li[1]').click() sleep(1) brower.find_element_by_xpath( '//*[@id="generalinfo"]/div[1]/div[1]').click() sleep(1) around_input = brower.find_element_by_xpath('//*[@id="nearby-input"]') around_input.send_keys(tag) around_input.send_keys(Keys.ENTER) sleep(2) # 点击缩小页面布局 # brower.find_element_by_xpath('//*[@id="map-operate"]/div[2]/div[2]/div[2]').click() # sleep(10) # 附近信息提取,还要点击页面提取下一个页面信息 info = [brower.find_element_by_class_name('poilist').text] while True: if '米' in info[-1]: try: page_location = brower.find_element_by_xpath( '//*[@id="poi_page"]/p') # 由于不同位置页面元素不同,选择最后一个页面按钮 next_page = page_location.find_elements_by_tag_name('span')[-1] # 判断是否是最后一个页面按钮是否可以点击 next_page_status = next_page.find_element_by_tag_name( 'a').get_attribute("onclick") if next_page_status is not None: next_page.click() sleep(2) info.append( brower.find_element_by_class_name('poilist').text) else: break except NoSuchElementException as e: break else: break brower.close() if len(info) == 1 and '米' not in info[0]: info = [] return info
def GetEndpoints(driver: WebDriver, trace: bool = False) -> Dict[str, str]: """Get a list of endpoints to fetch.""" driver.get("https://developer.tdameritrade.com/apis") elem = driver.find_element_by_class_name('view-smartdocs-models') categories = {} for row in elem.find_elements_by_class_name('views-row'): category = CleanName(row.text.splitlines()[0]) link = row.find_element_by_tag_name('a').get_attribute('href') categories[category] = link if trace: pprint.pprint(categories) # Process each of the categories. endpoints = [] for catname, catlink in sorted(categories.items()): logging.info("Getting %s", catlink) driver.get(catlink) for row in driver.find_elements_by_class_name('views-row'): link = row.find_element_by_tag_name('a').get_attribute('href') method, funcname, url = row.text.splitlines()[:3] funcname = CleanName(funcname.strip()) endpoints.append((catname, funcname, method, url, link)) if trace: pprint.pprint(endpoints) return endpoints
def get_children_decks(chrome_webdriver: WebDriver, child: WebElement) -> List[WebElement]: """ Get the list of decks of each parent deck """ parent_element = chrome_webdriver.find_element_by_class_name("deck-list") decks = parent_element.find_elements_by_class_name("dashboard-deck-row") return decks
def GetErrorCodes(driver: WebDriver) -> Dict[int, str]: """Extract a table of code -> message string.""" elem = driver.find_element_by_class_name('table-error-codes') errcodes = {} for tr in elem.find_elements_by_class_name('listErrorCodes'): code, message = [td.text for td in tr.find_elements_by_tag_name('td')] errcodes[int(code)] = message return errcodes
def __init__(self, driver: WebDriver): self.driver = driver self.title = driver.find_element_by_class_name('at-story__title').text self.article = driver.find_element_by_css_selector( '.at-story__article article').get_attribute('innerHTML') self.answer_els = driver.find_elements_by_css_selector( '.at-story__answers ul li a')
def find_parent_decks(chrome_webdriver: WebDriver, parent_deck_class: str) -> List[WebElement]: """ Find parent deck class > li """ parent_deck_element = chrome_webdriver.find_element_by_class_name( "user-packs") children = parent_deck_element.find_elements_by_tag_name("li") return children
def get_file_name_for_csv_files(chrome_webdriver: WebDriver) -> str: """ Get filename for csv file """ raw_text = chrome_webdriver.find_element_by_class_name( "new-modal-title").text valid_chars = f"-_.(){ascii_letters}{digits}ÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑáàâãéèêíïóôõöúçñ " filename = "".join(char for char in raw_text if char in valid_chars) return filename
def check_dialog(driver: WebDriver, name, timeout=30): try: driver.find_element_by_class_name("truste_overlay") print(f"{name}: show trust_overlay") WebDriverWait(driver, timeout).until( EC.presence_of_element_located( (By.CSS_SELECTOR, "[title='TrustArc Cookie Consent Manager']"))) trust_frame = driver.find_element_by_css_selector( "[title='TrustArc Cookie Consent Manager']") driver.switch_to.frame(trust_frame) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, "pdynamicbutton"))) cookie_dialog = driver.find_element_by_class_name("pdynamicbutton") cookie_dialog.find_element_by_class_name("call").click() print(f"{name}: click truste_overlay") sleep(5) driver.switch_to.default_content() except WebDriverException as e: driver.switch_to.default_content() print(f'{name}: {e.msg}')
def find_more_butto_and_click(driver: WebDriver): more_button = None for more_btn_class in MORE_BUTTON_CLASS: try: more_button = driver.find_element_by_class_name(more_btn_class) except: pass if more_button is not None: print("点击[继续阅读]按钮,加载所有子页面") driver.execute_script("arguments[0].click();", more_button) time.sleep(2) return print("没有找到[继续阅读]按钮")
class SeleniumModule(): def __init__(self): self.selenium = WebDriver(executable_path='Chromeドライバのパス') def quit(self): self.selenium.quit() def login(self, login_url): self.selenium.get(login_url) username_input = self.selenium.find_element_by_name("username") username_input.send_keys('username') password_input = self.selenium.find_element_by_name("password") password_input.send_keys('password') self.selenium.find_element_by_class_name('btn').click() def get_page_data(self, url, tag, attribute): res = requests.get(url) soup = BeautifulSoup(res.text, 'html.parser') # example # soup.find_all('a', {'class': 'r'}) link_list = soup.find_all(tag, attribute) return link_list
def _scrape_price_by_book_details(self, page_url, driver: WebDriver): try: open_new_tab(driver) driver.get(page_url) section = driver.find_element_by_class_name("section-1") container = section.find_element_by_class_name("container") book_price = container.find_elements_by_class_name( "book-price")[1].text close_current_tab(driver) return book_price except Exception as error: close_current_tab(driver) book_price = "0" return book_price
def get_cards_info_of_deck(chrome_webdriver: WebDriver, deck: WebElement): """ Get the cards info of each deck """ try: glasses_icon = deck.find_element_by_class_name( "ion-ios-glasses-outline") glasses_icon.click() sleep(2) cards_window_selection = chrome_webdriver.find_element_by_class_name( "preview-card-table") cards_list = cards_window_selection.find_elements_by_class_name( "preview-card") csv_file_name = get_file_name_for_csv_files(chrome_webdriver).replace( "Preview", "").strip() path = os.getcwd() path_csv = f"{path}/csv_sem_utf8_delimiter_virgula/" try: os.mkdir(path_csv) except OSError: pass with open(f"{path_csv}{csv_file_name}.csv", "w", newline="") as csv_file: writer = csv.writer(csv_file, delimiter=",") for card in cards_list: front_info = get_card_text_img_audio_data(card, "front") back_info = get_card_text_img_audio_data(card, "back") writer.writerow([front_info, back_info]) color_options = [ Fore.GREEN, Fore.MAGENTA, Fore.CYAN, Fore.BLUE, Fore.YELLOW ] print(choice(color_options) + f"{csv_file_name}.csv salvo!") chrome_webdriver.find_element_by_class_name("close-button").click() except NoSuchElementException: pass
def check_another_connection(driver: WebDriver, name) -> bool: try: driver.switch_to.default_content() notification = driver.find_element_by_class_name( 'notification-wrapper') if notification.find_element_by_tag_name( 'span' ).text == 'Your session was transferred to another browser tab.': print(f"{name}: run on another window") return True else: return False except WebDriverException: driver.switch_to.frame("iframetab1") return False
def select(driver: WebDriver, first_menu_index: int, second_menu_index: int, third_menu_index: int): # 点击CRM系统 crmSystemElement = driver.find_element_by_class_name("menu___1QsMw").find_elements_by_class_name("menu-item___3QMia")[first_menu_index] crmSystemElement.click() # 点击个人工作台 subMenu = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, "sider___1t24v"))) personalConsole = subMenu.find_elements_by_class_name("ant-menu-submenu")[second_menu_index] personalConsole.click() # 点击我的公海 inlineMenu = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, '//ul[contains(@id,"$Menu")]'))) myPublicSea = inlineMenu.find_elements_by_class_name("ant-menu-item")[third_menu_index] myPublicSea.click() iframe = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.TAG_NAME, "iframe"))) # 我的公海iframe区域 driver.switch_to.frame(iframe)
class Application: def __init__(self): self.wd = WebDriver() self.wd.implicitly_wait(5) self.session = SessionHelper(self) self.payment = PaymentHelper(self) self.localization = LocalizationHelper(self) self.navigation = NavigationHelper(self) self.login_page = LoginPage(self) self.currency = Currency(self) self.payment_systems = PaymentSystems(self) def experiment(self, a): self.wd.find_element(By.cssSelector('span[class=\"title\"]:contains(' + someText + ')')) self.wd.find_element_by_xpath() self.wd.find_element_by_class_name('forgot') self.wd.find_element_by_link_text('Forgot password') self.wd.find_element_by_css_selector() def check_exists_by_css_selector(self, selector): try: self.wd.find_elements_by_css_selector(selector) except NoSuchElementException: return False return True def check_exists_by_xpath(self, xpath): try: self.wd.find_element_by_xpath(xpath) except NoSuchElementException: return False return True def destroy(self): self.wd.quit()
def _scrape_pages_count(self, url, driver: WebDriver): page_without_pagination_count = 1 try: driver.get(url) pages = driver.find_element_by_class_name("pagination") standard_format = unidecode(pages.text) pages_list = list(standard_format.split(" ")) numbers = [] for item in pages_list: for subitem in item.split(): if subitem.isdigit(): numbers.append(subitem) numbers = list(map(int, numbers)) return numbers[-1] except Exception as e: return page_without_pagination_count
class WebDriverContainer: def __init__(self, headless: bool = False) -> None: ensure_driver_installed() print(f"Opening web browser...") options = Options() options.headless = headless self.driver = WebDriver(CHROME_DRIVER_INSTALL_PATH, options=options) self.driver.implicitly_wait(SELENIUM_TIME_TO_WAIT_IN_SECONDS) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): print(f"Closing web browser...") self.driver.close() def get(self, url: str, print_log: bool = True): if print_log: print(f"Retrieve url [{url}]...") self.driver.get(url) def find_element_by_tag_name(self, name: str): WebDriverWait(self.driver, SELENIUM_TIME_TO_WAIT_IN_SECONDS) \ .until(expected_conditions.presence_of_element_located((By.TAG_NAME, name))) return self.driver.find_element_by_tag_name(name) def find_element_by_id(self, id_: str): WebDriverWait(self.driver, SELENIUM_TIME_TO_WAIT_IN_SECONDS) \ .until(expected_conditions.presence_of_element_located((By.ID, id_))) return self.driver.find_element_by_id(id_) def find_element_by_class_name(self, name: str): WebDriverWait(self.driver, SELENIUM_TIME_TO_WAIT_IN_SECONDS) \ .until(expected_conditions.presence_of_element_located((By.CLASS_NAME, name))) return self.driver.find_element_by_class_name(name) @property def current_url(self): return self.driver.current_url
class ElementActions: def __init__(self, driver): """ :WebDriver driver: object """ self.driver = WebDriver() self.actions = ActionChains(self.driver) def __findElement(self, locator=''): val = locator.split(":") if val[0] == 'cssselector': element = self.driver.find_element_by_css_selector(val[1]) elif val[0] == 'xpath': element = self.driver.find_element_by_xpath(val[1]) elif val[0] == 'id': element = self.driver.find_element_by_id(val[1]) elif val[0] == 'name': element = self.driver.find_element_by_name(val[1]) elif val[0] == 'class': element = self.driver.find_element_by_class_name(val[1]) return element def clickThis(self, locator=""): self.__findElement(locator).click() return self def enterText(self, locator="", value="testData"): self.__findElement(locator).send_keys(value) return self def getWebElement(self, locator=""): return self.__findElement(locator) def hoverElement(self, locator=''): self.actions.move_to_element(locator).perform() return self
def cloud189(chrome: WebDriver, url="", password=""): if url != '': chrome.get(url) while password != '': try: sleep(0.5) chrome.find_element_by_id("code_txt").send_keys(password) chrome.find_element_by_partial_link_text("访问").click() break except Exception: continue while True: try: sleep(0.5) download_button = chrome.find_element_by_class_name("btn-download") ActionChains(chrome).move_to_element(download_button).perform() download_button.click() break except Exception: continue for i in range(4): try: sleep(0.5) chrome.switch_to.frame("udb_login") chrome.find_element_by_xpath( '''//*[@id="userName"]''').send_keys("18953197117") sleep(1) chrome.find_element_by_xpath( '''//*[@id="password"]''').send_keys("Cb19985466") sleep(1) chrome.find_element_by_id("j-login").click() break except Exception: continue
def safely_find_class(driver: WebDriver, value, sleepTime: float = 0.2): try: return driver.find_element_by_class_name(value) except WebDriverException: sleep(sleepTime) return safely_find_class(driver, value, sleepTime)
return [row_anchor.get_attribute('href')] return [] url = 'http://www.apothekenindex.at' products = ['productListing-odd', 'productListing-even'] results = pd.DataFrame( columns=['Name', 'Address', 'Tel', 'Fax', 'Email', 'Schedule', 'Link']) options = Options() options.page_load_strategy = 'eager' # options.set_headless(False) driver = WebDriver(ChromeDriverManager().install(), options=options) driver.get(url) # step 1) get all states links table = driver.find_element_by_class_name('infoBoxContents') states = table.find_elements_by_class_name('parent') states_links = [] for state in states: state_list = state.find_element_by_tag_name('ul') state_anchors = state_list.find_elements_by_tag_name('a') states_links += [a.get_attribute('href') for a in state_anchors if a.get_attribute('title') == ''] # Step 2) get all pharmacies links pharmacies_links = [] for state_link in states_links: page = state_link has_next = True while has_next is True:
import time from selenium.webdriver.chrome.webdriver import WebDriver driver = WebDriver( executable_path= "/Users/magic/PycharmProjects/zywa-spider-xiaociwei/plug/chromedriver/mac/chromedriver" ) driver.get("http://202.110.217.69:7001/hsp/logonDialog_113.jsp") driver.find_element_by_id("yhmInput").send_keys("371083198706245037") time.sleep(0.5) driver.find_element_by_id("mmInput").send_keys("2078") time.sleep(0.5) print('等待用户输入验证码') yymCode = input() driver.find_element_by_id("validatecodevalue1").send_keys(yymCode) driver.find_element_by_class_name("logonBtn").click() time.sleep(5) # C1001 driver.find_element_by_id("C1001").click()
class Scrap: def __init__(self): # creating configurations to the driver options = Options() options.add_argument('start-maximized') options.add_argument('--incognito') self.l_articulos = [] self.driver = WebDriver(executable_path=os.path.join( BASE_DIR, 'Driver', 'chromedriver'), options=options) self.driver.get('https://www.mercadolibre.com.co/') def page(self): try: li = self.driver.find_element_by_class_name("pagination__next") next_link = li.find_element_by_tag_name('a').get_attribute('href') while next_link != "#": time.sleep(1) self.run() self.driver.find_element_by_class_name( 'pagination__next').click() li = self.driver.find_element_by_class_name("pagination__next") next_link = li.find_element_by_tag_name('a').get_attribute( 'href') if "#" in next_link: break except: self.run() def run(self): info = self.driver.find_element_by_id('searchResults') ac = info.find_elements_by_tag_name('li') for a in ac: self.l_articulos.append( Articulo( a.find_element_by_class_name('main-title').text, a.find_element_by_class_name('price__fraction').text, a.find_element_by_tag_name('a').get_attribute( 'href')).__dict__) def generar_articulos(self, string, min='0', max='0'): try: search = self.driver.find_element_by_name('as_word') search.send_keys(string) button = self.driver.find_element_by_class_name('nav-icon-search') button.click() # minimo = self.driver.find_element_by_id('fromPrice') # minimo.send_keys(min) # time.sleep(5) # maximo = self.driver.find_element_by_id('toPrice') # maximo.send_keys(max) button_r = self.driver.find_element_by_xpath( '//*[@id="priceForm"]/div/button') button_r.click() time.sleep(5) self.page() # return self.l_articulos finally: time.sleep(10) self.driver.close() self.driver.quit()
def run_selenium(driver: WebDriver, user, passwd, id): # tor_process = setupTor(id) # setup_proxy(driver, "127.0.0.1", int(id) * 2 + 9050 + 1) driver.get(address) print(f"{user}: start get /shell") try: while True: try: WebDriverWait(driver, 180).until( EC.presence_of_element_located( (By.CLASS_NAME, "login-form__realm-user-id-row"))) user_form = driver.find_element_by_class_name( "login-form__realm-user-id-row") user_form.find_element_by_id("userid").send_keys(user) user_form.find_element_by_tag_name("button").click() print(f"{user}: send username") try: WebDriverWait(driver, 30).until( EC.presence_of_element_located( (By.CLASS_NAME, "error-header"))) print( f'{user}: err={driver.find_element_by_class_name("error-header").get_attribute("innerHTML")}' ) raise WebDriverException(f"{user} tor ip invalidate") except TimeoutException: print(f"{user}: pass ip validate,start clear proxy") # tor_process.terminate() # driver.execute_script("window.open('')") # default_handle = driver.current_window_handle # handles = list(driver.window_handles) # handles.remove(default_handle) # driver.switch_to.window(handles[0]) # setup_proxy(driver, "", 0) # driver.close() # driver.switch_to.window(default_handle) break except (ElementClickInterceptedException, ElementNotInteractableException): check_dialog(driver, user) continue while True: try: WebDriverWait(driver, 180).until( EC.presence_of_element_located( (By.CSS_SELECTOR, "[class='login-form__password-row ']"))) passwd_form = driver.find_element_by_class_name( "login-form__password-row ") passwd_input = passwd_form.find_element_by_id("password") passwd_input.clear() passwd_input.send_keys(passwd) print(user + ": send passwd") driver.find_elements_by_css_selector( '[class="login-form__button bx--btn bx--btn--primary"]' )[1].click() print(user + ": click login") break except (ElementClickInterceptedException, ElementNotInteractableException) as e: print(f'{user}:do login failed,msg= {e.msg}') check_dialog(driver, user) continue s_time = time.time() while True: if address in driver.current_url: print(f"{user} enter shell success") break else: c_time = time.time() if c_time - s_time > 180: print( f"{user}: enter shell too long,re enter,curentUrl={driver.current_url}" ) raise WebDriverException( f"{user}: userid or passwd error,relogin") sleep(2) except WebDriverException as e: print(f"{user}: login failed msg={e.msg}") driver.delete_all_cookies() # tor_process.terminate() run_selenium(driver, user, passwd, id) # change to tokyo while True: try: WebDriverWait(driver, 120).until( EC.presence_of_element_located( (By.CLASS_NAME, "bx--header__global"))) header = driver.find_element_by_class_name("bx--header__global") if header.find_element_by_class_name( "header__location-name").text != "Tokyo": print(f"{user}: region is not tokyo,begin to switch") header.find_element_by_css_selector( "[class='header__location-change-button bx--btn bx--btn--ghost']" ).click() selects = safelyFindId(driver, "selectRegion") selects.click() selects.find_element_by_css_selector( "[value='jp-tok']").click() safelyFindCSS(driver, "[class='bx--btn bx--btn--primary']").click() break except (ElementClickInterceptedException, ElementNotInteractableException): check_dialog(driver, user) continue except WebDriverException: print(user + ": enter shell too long,refresh page") driver.refresh() return switch_to_frame_execute(driver, str(user).split("@")[0].replace('.', "_"), id)
class Pimper: def __init__(self, src, dest=None, unknown=None, chromedriver_location=None, proxy_server=None, fast_proxy=False): if chromedriver_location is None: self.chromedriver_location = os.path.abspath( os.path.dirname( sys.argv[0])) + "\chromedriver_win32\chromedriver.exe" else: self.chromedriver_location = chromedriver_location if debug: print("Chrome location:", self.chromedriver_location) if debug: print("src:", src) print("dest:", dest) self.f = open(r'C:\Python34\Projects\pimp-my-collection\text.txt', 'a') self.f.write('\n' + str(datetime.today()) + '\n') self.titles = [] #Куда кидать отсортированные if dest is None: try: self.dest = os.path.abspath(os.path.dirname(sys.argv[0])) os.chdir(self.dest) os.mkdir("sorted_images") except OSError: if debug: print("dest folder already exists") pass finally: self.dest = (self.dest + "\sorted_images") os.chdir(self.dest) else: self.dest = dest try: os.chdir(self.dest) except FileNotFoundError: print("No such directory:", self.dest) exit(1) #Папка для картинок без сурса if unknown is None: try: os.mkdir("unknown") except OSError: if debug: print("unkn folder already exists") pass finally: self.unknown = self.dest + r"\unknown" else: self.unknown = unknown try: os.mkdir(self.unknown) except OSError: if debug: print("unknown folder already exists") pass if debug: print("dest:", self.dest) print("unknown:", self.unknown) #Откуда берем картинки self.folder = src try: self.images = os.listdir(path=self.folder) except FileNotFoundError: print("No such directory:", self.folder) exit(1) if debug: for i in self.images: try: print(i) except UnicodeEncodeError: i = i.encode('ascii', 'ignore') print("bad unicode:", i) self.sleep_time = 3 self.proxy_sleep_time = 3 self.waiting_time = 15 self.fast_proxy = fast_proxy #Новая версия - новая прокси from selenium.webdriver import Proxy if proxy_server is None: proxy_server = "163.172.175.210:3128" #https://free-proxy-list.net/ settings = {"httpProxy": proxy_server, "sslProxy": proxy_server} self.proxy_server = Proxy(settings) else: settings = {"httpProxy": proxy_server, "sslProxy": proxy_server} self.proxy_server = Proxy(settings) from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities cap = DesiredCapabilities.CHROME.copy() cap['platform'] = "WINDOWS" cap['version'] = "10" #Без прокси self.driver = ChromeDriver(desired_capabilities=cap, executable_path=self.chromedriver_location) #С прокси self.proxy_server.add_to_capabilities(cap) self.driver2 = ChromeDriver(desired_capabilities=cap, executable_path=self.chromedriver_location) def find_on_yandere(self): try: source = self.driver.find_element_by_class_name( 'tag-type-copyright') except NoSuchElementException: if debug: print("no source") return None if debug: print(source) print(source.text) source2 = source.find_elements_by_css_selector('a') if debug: print(source2) for i in source2: print(i.text) print(i.get_attribute('href')) print(source2[1].text) return source2[1].text def find_on_sankaku(self, addr): try: source = self.driver2.find_element_by_class_name( 'tag-type-copyright') if not self.fast_proxy: sleep(self.proxy_sleep_time) except TimeoutException: if debug: print("time out") self.driver2.get(addr) sleep(self.proxy_sleep_time) source = self.driver2.find_element_by_class_name( 'tag-type-copyright') if not self.fast_proxy: sleep(self.proxy_sleep_time) except NoSuchElementException: if not self.fast_proxy: if debug: print("no element") self.driver2.get(addr) sleep(self.proxy_sleep_time) try: source = self.driver2.find_element_by_class_name( 'tag-type-copyright') sleep(self.proxy_sleep_time) except NoSuchElementException: if debug: print("actually no element") return None else: return None if debug: print(source) print(source.text) if not self.fast_proxy: sleep(self.proxy_sleep_time) try: source2 = source.find_elements_by_css_selector('a') if not self.fast_proxy: sleep(self.proxy_sleep_time) except TimeoutException: sleep(self.proxy_sleep_time) if debug: print(source2) for i in source2: print(i.text) print(i.get_attribute('href')) print(source2[0].text) return source2[0].text def find_on_eshuushuu(self): got_source = False source = self.driver.find_elements_by_class_name('quicktag') check = self.driver.find_elements_by_tag_name('dt') if debug: for i in source: it = i.text try: print(it) print(i.get_attribute('span')) except UnicodeEncodeError: it = it.encode('ascii', 'ignore') print("bad unicode:", it) print(check) print("possible source:", source[1].text[1:len(source[1].text) - 1]) for i in check: if debug: print(i.text) if i.text.find("Source") != -1: return source[1].text[1:len(source[1].text) - 1] return None def find_on_danbooru(self, addr): try: source = self.driver2.find_element_by_class_name('category-3') if not self.fast_proxy: sleep(self.proxy_sleep_time) except TimeoutException: if debug: print("time out") self.driver2.get(addr) sleep(self.proxy_sleep_time) source = self.driver2.find_element_by_class_name('category-3') if not self.fast_proxy: sleep(self.proxy_sleep_time) except NoSuchElementException: if not self.fast_proxy: if debug: print("no element") self.driver2.get(addr) sleep(self.proxy_sleep_time) try: source = self.driver2.find_element_by_class_name( 'category-3') sleep(self.proxy_sleep_time) except NoSuchElementException: if debug: print("actually no element") return None else: return None if debug: print(source) try: source2 = source.find_elements_by_css_selector('a') if not self.fast_proxy: sleep(self.proxy_sleep_time) except TimeoutException: if debug: print("time out source 2") sleep(self.proxy_sleep_time) if debug: print(source2) for i in source2: print(i.text) print(i.get_attribute('href')) print("source:", source2[1].text) return source2[1].text def find_on_gelbooru(self, addr): try: source = self.driver2.find_element_by_class_name( 'tag-type-copyright') if not self.fast_proxy: sleep(self.proxy_sleep_time) except TimeoutException: if debug: print("time out") self.driver2.get(addr) sleep(self.proxy_sleep_time) source = self.driver2.find_element_by_class_name( 'tag-type-copyright') if not self.fast_proxy: sleep(self.proxy_sleep_time) except NoSuchElementException: if not self.fast_proxy: if debug: print("no element") self.driver2.get(addr) sleep(self.proxy_sleep_time) try: source = self.driver2.find_element_by_class_name( 'tag-type-copyright') sleep(self.proxy_sleep_time) except NoSuchElementException: if debug: print("actually no element") return None else: return None if debug: print(source) try: source2 = source.find_elements_by_css_selector('a') if not self.fast_proxy: sleep(self.proxy_sleep_time) except TimeoutException: if debug: print("time out source 2") sleep(self.proxy_sleep_time) if debug: print(source2) for i in source2: print(i.text) print(i.get_attribute('href')) print("source:", source2[1].text) return source2[1].text def move_image(self, folder_name): img = (self.img_name[1:len(self.img_name)]).encode('ascii', 'ignore') #Сурс не нашелся if folder_name is None: dest = (self.unknown).encode('ascii', 'ignore') try: if debug: print("src:", self.folder + self.img_name) print("dst:", dest) shutil.copy(self.folder + self.img_name, self.unknown) os.remove(self.folder + self.img_name) print("image", img, "successfully moved in", dest) except: print("Error while moving image", img) #Сурс найден else: dest = (self.dest + r'\n'[:-1] + folder_name).encode( 'ascii', 'ignore') #Убираем запрещенные символы для имени папки forbidden_symbols = re.findall('[*|\:"<>?/]', folder_name) for symb in forbidden_symbols: if debug: print(symb) folder_name = folder_name.replace(symb, "").lower() if debug: print("new folder name:", folder_name) print("writing...") if (folder_name not in self.titles): try: self.f.write(folder_name + '\n') self.titles.append(folder_name) except UnicodeEncodeError: pass try: os.mkdir(folder_name) except OSError: if debug: print("folder", folder_name, "already exists") pass try: shutil.copy(self.folder + self.img_name, folder_name) os.remove(self.folder + self.img_name) print("image", img, "successfully moved in", dest) except OSError: print("Error while moving image", img) sleep(self.sleep_time) #Приоритет сайтов def sort_addresses(self, pic_addr): variants = self.driver.find_element_by_id( 'pages').find_elements_by_tag_name('td') if debug: print("find %") for i in variants: try: print(i.text) except UnicodeEncodeError: new_i = i.text.encode('ascii', 'ignore') print("bad unicode:", new_i) for addr in pic_addr: addr2 = addr.get_attribute('href') print("trying", addr2) print("1st variant:", variants[6].text, "len =", len(variants), "len var = ", len(variants[6].text)) #Второе найденное similarity if len(variants[6].text) == 0: pos = 9 else: pos = 10 priority = 6 best_addr = pic_addr[0].get_attribute('href') if (best_addr.find("danbooru")) != -1: if debug: print("danbooru[0]") priority = 3 elif (best_addr.find("sankaku")) != -1: if debug: print("sankaku[0]") priority = 4 elif (best_addr.find("gelbooru")) != -1: if debug: print("gelbooru[0]") priority = 5 elif (best_addr.find("shuushuu")) != -1: if debug: print("shuushuu[0]") priority = 2 elif (best_addr.find("yande")) != -1: if debug: print("yandere[0]") priority = 1 if priority > 1: for addr in pic_addr[1:len(pic_addr)]: addr2 = addr.get_attribute('href') if pos > len(variants): break similarity = int(re.search('\d+', variants[pos].text).group()) if debug: print("similarity =", similarity) #if similarity >= 70: if (addr2.find("danbooru")) != -1: if debug: print("danbooru", priority) if priority > 3: best_addr = addr2 priority = 3 elif (addr2.find("sankaku")) != -1: if debug: print("sankaku", priority) if priority > 4: best_addr = addr2 priority = 4 elif (addr2.find("gelbooru")) != -1: if debug: print("gelbooru", priority) if priority > 5: best_addr = addr2 priority = 5 elif (addr2.find("shuushuu")) != -1: if debug: print("shuushuu", priority) if priority > 2: best_addr = addr2 priority = 2 break elif (addr2.find("yande")) != -1: if debug: print("yandere", priority) if priority > 1: best_addr = addr2 priority = 1 break pos += 4 #Следующее similarity if debug: print("best_addr:", best_addr) return best_addr, priority def search_for_source(self, pic_addr): best_addr, priority = self.sort_addresses(pic_addr) folder_name = None if debug: print("trying", best_addr) if priority == 1: print("searching on yandere") try: self.driver.get(best_addr) except WebDriverException as inst: if debug: print(inst) exit(1) folder_name = self.find_on_yandere() elif priority == 4: print("searching on sankaku") try: self.driver2.get(best_addr) sleep(self.proxy_sleep_time) except TimeoutException: if debug: print("time out in if") sleep(self.proxy_sleep_time) except WebDriverException as inst: if debug: print(inst) exit(1) folder_name = self.find_on_sankaku(best_addr) elif priority == 2: print("searching on e-shuushuu") try: self.driver.get(best_addr) except WebDriverException as inst: if debug: print(inst) exit(1) folder_name = self.find_on_eshuushuu() elif priority == 3: print("searching on danbooru") try: self.driver2.get(best_addr) sleep(self.proxy_sleep_time) except TimeoutException: if debug: print("time out in if") sleep(self.proxy_sleep_time) except WebDriverException as inst: if debug: print(inst) exit(1) folder_name = self.find_on_danbooru(best_addr) elif priority == 5: print("searching on gelbooru") try: self.driver2.get(best_addr) sleep(self.proxy_sleep_time) except TimeoutException: if debug: print("time out in if") sleep(self.proxy_sleep_time) except WebDriverException as inst: if debug: print(inst) exit(1) folder_name = self.find_on_gelbooru(best_addr) if folder_name is None: print("No relevant match for", self.img_name[1:len(self.img_name)]) self.move_image(folder_name) def iqdb_actions(self): for image in self.images: print("\nprocessing", self.images.index(image) + 1, "of", len(self.images)) self.img_name = r'\n'[:-1] + image if debug: try: print(self.folder + self.img_name) except UnicodeEncodeError: print("bad unicode") sleep(self.sleep_time) if ((image[len(image) - 4:] != ".jpg") and (image[len(image) - 4:] != ".png") and (image[len(image) - 5:] != ".jpeg")): try: print("Unsupported format:", image) except UnicodeEncodeError: image = image.encode('ascii', 'ignore') print(image) else: self.driver.get("http://iqdb.org/") #Вставляем изображение element = ui.WebDriverWait( self.driver, self.waiting_time).until( lambda driver: self.driver.find_element_by_id("file")) if debug: print(element) element.send_keys(self.folder + self.img_name) #Сабмитим element = ui.WebDriverWait( self.driver, self.waiting_time).until( lambda driver: self.driver.find_element_by_xpath( "//input[@value='submit']")) if debug: print(element) try: element.click() except TimeoutException: sleep(self.sleep_time) sleep(self.sleep_time) #Ищем лучшее совпадение try: pic_addr = ui.WebDriverWait( self.driver, self.waiting_time).until( lambda driver: self.driver. find_elements_by_css_selector('.image a')) except TimeoutException: print("Image", image, "is to o large") self.move_image(None) else: if debug: print(pic_addr) matches = ui.WebDriverWait( self.driver, self.waiting_time ).until(lambda driver: self.driver.find_element_by_xpath( '//*[@id="pages"]/div[2]/table/tbody/tr[1]/th')) if debug: print("matches:", matches) print(matches.text) if (matches.text.find("No")) != -1: print(matches.text, "for", image) self.move_image(None) else: self.search_for_source(pic_addr) sleep(self.sleep_time) def pimp(self): if debug: print("proxy mode:", self.fast_proxy) try: self.iqdb_actions() except KeyboardInterrupt: print("Stop working...") finally: self.driver.quit() self.driver2.quit() self.f.close() print("Job's done")
import math from selenium.webdriver.chrome.webdriver import WebDriver string = str(math.ceil(math.pow(math.pi, math.e) * 10000)) con = WebDriver() con.get("http://suninjuly.github.io/find_link_text") con.find_element_by_partial_link_text(string).click() input1 = con.find_element_by_tag_name("input") input1.send_keys("Ivan") input2 = con.find_element_by_name("last_name") input2.send_keys("Petrov") input3 = con.find_element_by_class_name("city") input3.send_keys("Smolensk") input4 = con.find_element_by_id("country") input4.send_keys("Russia") button = con.find_element_by_css_selector("button.btn") button.click()