def login(username, password): driver = Firefox(executable_path=gecko_path) driver.get("https://instagram.com.br") time.sleep(3) driver.find_element(By.XPATH, xpath['username']).send_keys(username) driver.find_element(By.XPATH, xpath['password']).send_keys(password, Keys.ENTER) return driver
class TestNavigation(StaticLiveServerTestCase): """ test Navigation from homepage """ def setUp(self): self.browser = Firefox() self.browser.implicitly_wait(10) def tearDown(self): self.browser.close() def test_bad_address_returns_handler404(self): """ Test bad address is caught by the handler and redirect to error page """ print(inspect.currentframe().f_code.co_name) response = self.browser.get('%s%s' % (self.live_server_url, '/test')) # message = self.browser.find_element_by_tag_name('h1').text self.assertTemplateUsed(response, 'errors/errors.html') def test_click_mentions(self): """ Test the click on mentions redirect to mentions page """ print(inspect.currentframe().f_code.co_name) """ self.browser.get(self.live_server_url) user_url = self.live_server_url + reverse('home_app:mentions') element = self.browser.find_element_by_partial_link_text('mentions') self.scroll_shim(self.browser, element) actions = ActionChains(self.browser) actions.move_to_element(element) actions.click(element) actions.perform() self.assertEquals(self.browser.current_url, user_url) """ def test_click_icon_person_to_user(self): """ Test click on the person image redirect to user page """ print(inspect.currentframe().f_code.co_name) self.browser.get(self.live_server_url) user_url = self.live_server_url + reverse('user_app:login') self.browser.find_element(By.CSS_SELECTOR, ".nav-item img").click() self.assertEquals(self.browser.current_url, user_url) def scroll_shim(self, passed_in_driver, object): x = object.location['x'] y = object.location['y'] scroll_by_coord = 'window.scrollTo(%s,%s);' % (x, y) scroll_nav_out_of_way = 'window.scrollBy(0, -120);' passed_in_driver.execute_script(scroll_by_coord) passed_in_driver.execute_script(scroll_nav_out_of_way)
def __call__( self, driver: webdriver.Firefox ) -> Union[WebElement, Literal[False]]: element = driver.find_element(*self.locator) if self.cls_name in element.get_attribute('class').split(): return element return False
class WhatsAppBot: def __init__(self): self.pessoas = ['Jhonnatan'] self.mensagem = 'Olá mundo, sou um botzinho que está aprendendo a digitar sozinho' options = Options() options.page_load_strategy = 'eager' # Carrega o essencial da pagina para melhorar a performance do algoritmo. self.driver = Firefox(executable_path=r'./geckodriver.exe') def enviar_mensagem(self): self.driver.get('https://web.whatsapp.com/') time.sleep(5) for pessoa in self.pessoas: try: # Tentará localizar a pessoa na lista de conversas ativas encontrar_pessoa = self.driver.find_element(By.XPATH, f"//span[@title='{pessoa}']") encontrar_pessoa.click() caixa_de_mensagem = self.driver.find_element(By.XPATH, '//*[@id="main"]/footer/div[1]/div[2]/div/div[2]') caixa_de_mensagem.click() time.sleep(3) caixa_de_mensagem.send_keys(self.mensagem) enviar = self.driver.find_element(By.XPATH, '/html/body/div[1]/div/div/div[4]/div/footer/div[1]/div[' '3]/button') enviar.click() except: # Caso não ache, vai procurar na lista de contatos time.sleep(4) nova_conversa = self.driver.find_element(By.XPATH, '//*[@id="side"]/div[1]/div/label/div') nova_conversa.click() time.sleep(3) keyboard.write(f'{pessoa}') time.sleep(3) achar_pessoa = self.driver.find_element(By.XPATH, f"//span[@title='{pessoa}']") achar_pessoa.click() caixa_de_mensagem = self.driver.find_element(By.XPATH, '//*[@id="main"]/footer/div[1]/div[2]/div/div[2]') caixa_de_mensagem.click() time.sleep(3) caixa_de_mensagem.send_keys(self.mensagem) enviar = self.driver.find_element(By.XPATH, '//span[@data-icon="send"]') enviar.click() time.sleep(5) # Tempo para cada loop para não travar seu pc (risos)
def login1(): opt = FirefoxOptions() # 创建Chrome参数对象 opt.headless = True # 把Chrome设置成可视化无界面模式,windows/Linux 皆可 driver = Firefox(options=opt) # 创建Chrome无界面对象 #selenium登录测试长庆 #driver = webdriver.Firefox() driver.get("http://192.168.6.27:6030/passports/login?service=http%3A%2F%2F192.168.6.27%3A6030%2Fportals%2Fcas&tenantCode=cqsh&trial=false") driver.find_element(By.ID, "username").send_keys("test") driver.find_element(By.ID, "pwd1").send_keys("1") driver.find_element(By.CSS_SELECTOR, ".justUse").click() time.sleep(5) #获取JSESSIONID c= driver.get_cookies() #print (c) #print (c[0]) for a in c: #print (a) if a['name'] == 'JSESSIONID': b=a #print (b) cookies={'JSESSIONID': b['value']} #cookies={'JSESSIONID': '3BAB7DF0381948EA376F907859D5321C'} driver.close() driver.quit() return cookies
def take_screenshot(name): driver_options = Options() driver_options.add_argument("--headless") driver = Firefox(options=driver_options) driver.get(f"https://www.faceit.com/en/players/{name}") WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.CLASS_NAME, "game-card__info"))) # We wait for the stats to be loaded try: driver.find_element( By.XPATH, "//*[text()[contains(., 'I understand')]]").click( ) # If there are cookies to be accepted we accept them except Exception as err: print("No cookie button: " + str(err)) stats = driver.find_element(By.CLASS_NAME, "game-card__info") if not "screenshots" in os.listdir(): os.mkdir("screenshots") stats.screenshot(f"screenshots/{name}.png") driver.quit() image = Image.open(f"screenshots/{name}.png") new_image = image.crop((5, 1, 630, 188)) new_image.save(f"screenshots/{name}.png")
def login1(): print("开始登录") opt = FirefoxOptions() # 创建Chrome参数对象 opt.headless = True # 把Chrome设置成可视化无界面模式,windows/Linux 皆可 driver = Firefox(options=opt) # 创建Chrome无界面对象 for i in range(3): # selenium登录测试长庆 time.sleep(i) driver.get( "http://192.168.6.156/passports/login?service=http%3A%2F%2F192.168.6.156%2Fportals%2Fcas&tenantCode=clsh&trial=false" ) driver.find_element(By.ID, "name").send_keys("clshadmin") driver.find_element(By.ID, "pwd1").send_keys("1") driver.find_element(By.LINK_TEXT, "登录").click() j = i + 10 #time.sleep(j) # 获取JSESSIONID1 c = driver.get_cookies() for a in c: if a['name'] == 'JSESSIONID': b = a cookies = {'JSESSIONID': b['value']} print("登录成功,cookies", cookies) time.sleep(j) url = 'http://192.168.6.156/portals/clsh' rs = requests.get(url=url, cookies=cookies) print(rs.status_code, i) data = rs.content.decode('utf-8') searchObj = re.search("window.csrf = '(.+?)';", data, re.M | re.I) if searchObj: #print("searchObj.group() : ", searchObj.group()) #print("searchObj.group(1) : ", searchObj.group(1)) csrf = searchObj.group(1) print("csrf found!!", csrf) break else: time.sleep(1) if i == 19: print("Csrf Not found!!") csrf = i # driver.close() # driver.quit() # return cookies, 0 driver.close() driver.quit() driver.close() driver.quit() print("quit browser") return cookies, csrf
class Navegador(): def __init__(self): self.__options = Options() self.__options.page_load_strategy = "normal" self.__driver = Firefox(executable_path=Caminhos().getGecko, options=self.__options) def AbreFacebook(self): self.__driver.get(Caminhos().getFacebook) def FazerLogin(self): self.__driver.find_element(By.ID, Facebook().getEmail).send_keys( Email().getEmail) self.__driver.find_element(By.ID, Facebook().getSenha).send_keys( Senha().getSenha) try: self.__driver.find_element(By.ID, Facebook().getLogin).click() except: self.__driver.find_element(By.ID, Facebook().getLogin2).click() def VisitaAlvo(self): self.__driver.get(Caminhos().getAlvo) def ListaPublicacoes(self): return self.__driver.find_elements(By.CLASS_NAME, Facebook().getPubs) def CurtePublicacoes(self): for publicacao in self.ListaPublicacoes(): publicacao.find_element(By.CLASS_NAME, Facebook().getCurtir).click() def Encerra(self): self.__driver.close() self.__driver = None
from selenium.webdriver import Firefox from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.expected_conditions import ( text_to_be_present_in_element_value) url = 'https://selenium.dunossauro.live/aula_10_d.html' browser = Firefox() browser.get(url) wdw = WebDriverWait(browser, 50) locator_h4 = (By.TAG_NAME, 'h4') locator_nome = (By.CSS_SELECTOR, 'input[name="nome"]') wdw.until(text_to_be_present_in_element_value(locator_nome, 'disponível')) browser.find_element(*locator_nome).send_keys('Fausto') wdw.until( text_to_be_present_in_element_value( ('css selector', 'input[name="email"]'), 'disponível')) browser.find_element(*('css selector', 'input[name="email"]')).send_keys('*****@*****.**')
def check_exists_by_xpath(by: By, value: str, driv: webdriver.Firefox): try: driv.find_element(by, value) except NoSuchElementException: return False return True
from selenium.webdriver import Firefox from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.expected_conditions import ( presence_of_element_located) url = 'https://selenium.dunossauro.live/aula_10_a.html' browser = Firefox() browser.get(url) wdw = WebDriverWait(browser, 30) locator = (By.CSS_SELECTOR, '#request') wdw.until(presence_of_element_located(locator)) browser.find_element(*locator)
from time import sleep from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver import Firefox # 启动火狐浏览器 driver = Firefox() # 打开01-id.html driver.get("file:///C:/Users/ZhangQi/Downloads/seleniumday0401demo/01-id.html") # 向姓名文本框中输入zhangsan unInput = driver.find_element_by_id("username") unInput.send_keys("zhangsan") # 向密码框中输入123456 pwInput = driver.find_element(By.ID, "password") pwInput.send_keys("123456") # 提交 tjButton = driver.find_element_by_id("tijiao") tjButton.click() # 暂定4秒 sleep(4) # 关闭浏览器 driver.quit()
text2 = text1.value #print(text2) driver.get(url) driver.find_element_by_name("seq").clear() driver.find_element_by_name('seq').send_keys(text) driver.find_element_by_name('seq').send_keys(Keys.RETURN) driver.find_element_by_name('seq').send_keys(text2) driver.find_element_by_name('B1').click() #time.sleep(20) wait = WebDriverWait(driver, 60) element = wait.until(EC.title_is(('COMSPA - Results Page'))) table_id = driver.find_element(By.ID, 'scores') rows = table_id.find_elements(By.TAG_NAME, "tr") # for row in rows: a = rows[1].find_elements(By.TAG_NAME, "td")[0] print(a.text) #prints text from the element b = rows[1].find_elements(By.TAG_NAME, "td")[1] print(b.text) #prints text from the element c = rows[1].find_elements(By.TAG_NAME, "td")[2] print(c.text) #prints text from the element d = rows[1].find_elements(By.TAG_NAME, "td")[3] print(d.text) #prints text from the element print("--------") ws1.write(current_row, 0, a.text) ws1.write(current_row, 1, b.text) ws1.write(current_row, 2, c.text) ws1.write(current_row, 3, d.text)
from selenium.webdriver import Firefox from selenium.webdriver.common.by import By from selenium.webdriver.remote.webelement import WebElement class Link(WebElement): def get_href(self): return self.get_attribute('href') driver = Firefox() driver.get('http://ya.ru') element = driver.find_element(By.XPATH, 'id("mail")/a') element.__class__ = Link print element.get_href() driver.close()
#navigate landing page try: browser.get( 'https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html' ) except: print >> stderr, "Could not reach jdk8 landing page." exit() #select cookie consent iframe try: WebDriverWait(browser, 20).until( EC.presence_of_element_located( (By.XPATH, '//*[@title="TrustArc Cookie Consent Manager"]'))) browser.switch_to.frame( browser.find_element(By.XPATH, '//*[@title="TrustArc Cookie Consent Manager"]')) #click cookie consent button WebDriverWait(browser, 20).until( EC.element_to_be_clickable( (By.XPATH, "/html/body/div[8]/div[1]/div/div[2]/div[2]/a[1]"))).click() except: print >> stderr, "Cookie consent iframe interaction failed." exit() #click linux x64 button try: browser.switch_to.default_content() browser.execute_script("window.scrollTo(0, 873)") actions.move_to_element(
path = './scraper/bin/geckodriver' web_scrapper = Firefox(executable_path=path, options=options) ''' Ahora que tenemos una instancia del navegador, hemos de enviar un dominio para realizar nuestra busqueda, para este caso; usaremos la imagenes de google como ejemplo ''' web_scrapper.get('https://github.com/') ''' El parametro de busqueda "query" se refiere a la barra de busqueda que vamos a usar, ya que el dominio que usamos es "simple", solo necesitamos seleccionar una input ''' query = web_scrapper.find_element(By.CSS_SELECTOR, 'input.form-control.header-search-input') ''' Ya que tenemos el buscador de la respectiva pagina, ahora vamos a usar el metodo send_keys() para enviar nuestra busqueda al navegador respectivo, para el caso de ejemplo usaremos la palabra "gatitos", luego al mismo parametro le daremos la instrucción Keys.RETURN para ejecutar la busqueda (Basicamente seria como dar enter) ''' query.send_keys(ar[1]) query.send_keys(Keys.RETURN) ''' Ahora que a hemos ingresado una consulta al buscador falta recoger los datos que esta nos arroje, para ello usaremos una variable llamada "search_res" (Resultado de la busqueda), dado que dependiendo de una serie de variable como la conección a internet, la pagina, los servidores, etc; la consulta que se ingresa puede llegar a tardar un rato, usaremos la la función WebDriverWait (importada como "wait") para dar una espera a la consulta que
class InstagramScraper(): def __init__(self): options = Options() # options.add_argument('-headless') # headless mode so the window doesnt actually appear (disabled currently) caps = DesiredCapabilities().FIREFOX caps["pageLoadStrategy"] = "none" # scrape page prematurely self.all_data = [ ] # a collection of all the scraped user data held in a dictionary # # TODO: The executable_path will have to be reconfigured for the # currnet location in instagram_scrape/webdrivers/geckodriver.exe # self.driver = Firefox(executable_path='webdrivers/geckodriver.exe', options=options, capabilities=caps) # driver to access webpages self.username = None # log a user in from either input arguments or stored json def authenticate(self, username=False, password=False): LOGIN_URL = 'https://www.instagram.com/accounts/login/' # self.driver.switch_to.frame(self.driver.find_element_by_name('spout-unit-iframe')) # switch iframe code # get login details from json if none specified if not (username and password): with open(r'authentication/config.json') as file: user_details = json.load(file) username = user_details['username'] password = user_details['password'] self.driver.get(LOGIN_URL) # load up the instagram login page # username field username_elem = WebDriverWait(self.driver, 15).until( EC.presence_of_element_located( (By.XPATH, "//input[@name='username']"))) username_elem.click() username_elem.send_keys(username) # send the keys for the username # password field password_elem = WebDriverWait(self.driver, 15).until( EC.presence_of_element_located( (By.XPATH, "//input[@name='password']"))) password_elem.click() password_elem.send_keys(password) # send the keys for the password # click the submit button submit_elem = WebDriverWait(self.driver, 10).until( EC.presence_of_element_located( (By.XPATH, "//button[@type='submit']"))) submit_elem.click() # when you launch without cookies it promtps you to set stuff up. this clicks "not now" not_now_elem = WebDriverWait(self.driver, 10).until( EC.presence_of_element_located( (By.XPATH, "/html/body/span/div/div[2]/a[2]"))) not_now_elem.click() # open a user's instagram page def open_user(self, username): print('opened a new user {}'.format(username)) # if we have previously scraped data for a user lets add it to the total data before we if self.username is not None: self.all_data.append({ 'username': self.username, 'data': self.user_data }) self.user_data = { } # stores all the current information about the user being scraped # direct to a new webpage of users self.url_to_user = r'https://www.instagram.com/' + username self.driver.get(self.url_to_user) self.username = username print('loaded user {} , exiting function'.format(self.url_to_user)) # get profile stats for a user def get_user_stats(self): # function to remove the potential punctuation in a element def remove_punctuation(input_elem): return input_elem.text.translate( str.maketrans('', '', string.punctuation)) # XPATHS for post \ follower \ following counts posts = remove_punctuation(WebDriverWait(self.driver, 15).until(\ EC.presence_of_element_located((\ By.XPATH, "//*[@id='react-root']/section/main/div/header/section/ul/li[1]/span/span")))) followers = remove_punctuation(WebDriverWait(self.driver, 15).until(\ EC.presence_of_element_located((\ By.XPATH, "//*[@id='react-root']/section/main/div/header/section/ul/li[2]/a/span")))) following = remove_punctuation(WebDriverWait(self.driver, 15).until(\ EC.presence_of_element_located((\ By.XPATH, "/html/body/span/section/main/div/header/section/ul/li[3]/a/span")))) # for debug: print out all the data for the currently loaded user print( f'the user data:\nposts : {posts}\nfollowers: {followers}\nfollowing: {following}' ) # store data in the main dictionary for later self.user_data['post_count'] = int(posts) self.user_data['follower_count'] = int(followers) self.user_data['following_count'] = int(following) def get_image_data(self): # helper function to gather url metadata and then return the bad links def gather_data_from_url(picture_links): def get_likes(): like_path = "/html/body/span/section/main/div/div/article/div[2]/section[2]/div/div/a/span" try: likecount = self.driver.find_element(By.XPATH, like_path).text except NoSuchElementException: likecount = 0 # post uses views instead of likes return likecount def get_comments(): text = self.driver.find_elements(By.XPATH, text_path) text = [i.text for i in text] return text def get_users(): # get all users that have commented users_path = "/html/body/span/section/main/div/div/article/div[2]/div[1]/ul/li/div/div/div/h3/a" users = self.driver.find_elements(By.XPATH, users_path) users = [i.text for i in users] return users missed_urls = [] old_caption = False # ensures the first run of the while loop stops early # open each url and collect metadata from the post for url in picture_links: try: # url = self.url_to_user + element.get_attribute('href') print(f'url to picture is {url}') self.driver.get(url) text_path = "/html/body/span/section/main/div/div/article/div/div/ul/li/div/div/div/span" # run this loop to ensure that the page being parsed was not equal # to the last page that was just scraped. If this loop does not run # then we will parse the same page over and over before the page # ever even loads for the first time while True: # catch an exception that happens from reloading the page while scraping it try: caption_text = WebDriverWait( self.driver, 15).until( EC.presence_of_element_located( (By.XPATH, text_path))).text except StaleElementReferenceException: print('Refresh page exception ! ') continue # if its the first time in the for loop we can just exit now if old_caption == False: print( 'the old caption was false, continuing onward') text = get_comments() users = get_users() likecount = get_likes() break # get all text from the comments print('::::the caption is {} \n'.format(caption_text)) print('::::the old caption was {}'.format(old_caption)) if caption_text == old_caption: print('!!!captions match, try again') continue else: print('!!!!captions dont match, break to continue') text = get_comments() users = get_users() likecount = get_likes() if len(users) == len( text) and old_caption == '<empty>': if len( text ) == old_text_len and likecount == old_like_count: continue break print( f"like count {likecount} usernames {len(users)} text{len(text)}" ) print(users) print(text) # This if else block catches to check if there is a caption or not: # case 1: there is no caption if len(text) == len(users): caption_text = '<empty>' print('!!!!!::::: WARNING: NO CAPTION TEXT FOUND!!!!') # case 2: there is a caption (1 more text than users) elif len(text) + 1 != len(users): caption_text = text.pop(0) # case 3: the lengths dont match how we expect. bad shit happened else: raise ValueError( 'holy shit they are supposed to be the same length and they are not some bad shit happened' ) print('\n\n') parsed_comment_data = [] for i in range(len(text)): parsed_comment_data.append([users[i], text[i]]) self.user_data['posts'].append({ 'comments': parsed_comment_data, 'like_count': likecount, 'caption': caption_text }) old_caption = caption_text old_text_len = len(text) old_like_count = likecount except StaleElementReferenceException: missed_urls.append(url) return missed_urls # try to open first n images and gather captions current_count = 1 self.user_data['posts'] = [] # {'comments': [], likecount: 0} # make sure the first few images are loaded WebDriverWait(self.driver, 10).until( EC.presence_of_element_located( (By.XPATH, '//a[contains(@href, "/p/")]'))) #'//a[contains(@href, "WO20")]' print('here') body_elem = self.driver.find_element(By.XPATH, '/html') picture_links = set() picture_url_xpath = '//a[contains(@href, "/p/")]' # push down the user's page while we are still finding more pictures while current_count < self.user_data['post_count']: print(f'in loop with a picture count of {current_count}') # set the previous count of images to what it was before # and now find a list() of the total number of links on a page all_picture_link_elements = self.driver.find_elements( By.XPATH, picture_url_xpath) # get the length of all the elements that contain pictures try: picture_links.update({ i.get_attribute('href') for i in all_picture_link_elements if i not in picture_links }) except StaleElementReferenceException: continue current_count = len(picture_links) print('total len: {} current len: {}'.format( current_count, len(all_picture_link_elements))) # we go back to an element and send the page down since # page down requires that it be done to an element class for _ in range(3): time.sleep(.1) print('scroll...') body_elem.send_keys(Keys.PAGE_DOWN) print( f'about to parse through the {len(all_picture_link_elements)} pictures found' ) while True: picture_links = gather_data_from_url(picture_links) if len(picture_links) == 0: break def clear_data_dict(self): self.user_data = {} # # THIS FUNCTION NEEDS TO BE FIXED TO FIND FOLLOWING # def get_user_followers(self): def parse_html_for_usernames(input_html, return_names=False): # beautiful soup setup to find all candidate elements with <a> tag soup = bs4.BeautifulSoup(input_html, features="html.parser") links = soup.find_all('a') usernames = [] for j in links: try: j['href'] usernames.append(j['title']) except Exception: pass if return_names == True: return usernames else: return len(usernames) following_path = '/html/body/span/section/main/div/header/section/ul/li[3]/a/span' user_path = '/html/body/div[2]/div/div[2]/ul/div/li/div/div[1]/div[1]/a' header_path = '/html/body/div[2]/div/div[2]' # /html/body/div[2]/div/div[2]/ul/div/li[119]/div/div[1]/div[2]/div[1]/a following_elem= WebDriverWait(self.driver, 10).until(\ EC.presence_of_element_located((\ By.XPATH, following_path))) following_elem.click() WebDriverWait(self.driver, 10).until(\ EC.presence_of_element_located((\ By.XPATH, user_path))) link_count = 0 # the header we will page down from header = WebDriverWait(self.driver, 10).until(\ EC.presence_of_element_located((\ By.XPATH, header_path))) # this ensures we dont start loading suggestions for popular users for _ in range(3): time.sleep(1) header.send_keys(Keys.PAGE_DOWN) time.sleep(1.5) # keep scrolling through the page until we have loaded every single thing we need while True: source = self.driver.page_source link_count = parse_html_for_usernames(source) print(f'total links found: {link_count}') for _ in range(30): header.send_keys(Keys.PAGE_DOWN) if link_count == self.user_data['following_count']: break # since we are out of the loop we must have found every single username link # we can add it to self.user_data and exit the function self.user_data['following'] = parse_html_for_usernames(source, True)
class Insta_automate: def __init__(self, username, passwd): self.username, self.passwd = username, passwd self.open_instagram() print(its_ok + "Opened instagram") self.login() print(its_ok + "logged in") def open_instagram(self): if os.name == "nt": # if its windows try: self.brows = Chrome(executable_path=chrome_path, chrome_options=headless_for_chrome()) except WebDriverException: print(its_not_ok + "Cannot find Chrome binary...\nTrying Firefox") self.brows = Firefox(service=Service(firefox_path), options=headles_for_firefox()) else: try: self.brows = Firefox(service=Service(firefox_path), options=headles_for_firefox()) except WebDriverException: print(its_not_ok + "Cannot find gecko...\nTrying install") install_to_os() print( its_ok + "Installed Successfully Again do you want to headles or not ?" ) self.brows = Firefox(service=Service(firefox_path), options=headles_for_firefox()) self.brows.maximize_window() self.brows.implicitly_wait(20) self.brows.get("https://www.instagram.com/") self.find_username = self.brows.find_element( By.NAME, "username").send_keys(self.username) self.find_passwd = self.brows.find_element( By.NAME, "password").send_keys(self.passwd) def login(self): self.login_but = self.brows.find_element(By.XPATH, xpaths["login_but"]).click() self.dont_save_but = self.brows.find_element(By.CLASS_NAME, "cmbtv").click() def go_to_my_profile(self): self.brows.get("https://www.instagram.com/{}/".format(username)) self.get_profile_info(self.brows.page_source) def go_to_user_page(self, name_list): for name in name_list: self.brows.get("https://www.instagram.com/{}/".format(name)) def get_followers_and_followings(self, name): self.brows.get("https://www.instagram.com/{}/".format( name)) #go to desired userpage print( f"Scrape {Fore.MAGENTA}{name}{Fore.RESET}'s follower or following users [Default is 1]" ) print(f""" [{Fore.BLUE} {Fore.RESET}] POST : {Fore.GREEN} {self.get_profile_info()["post"]} {Fore.YELLOW}(in maintenance) {Fore.RESET} [{Fore.BLUE}1{Fore.RESET}] FOLLOWING : {Fore.GREEN} {self.get_profile_info()["following"]} {Fore.RESET} [{Fore.BLUE}2{Fore.RESET}] FOLLOWERS : {Fore.GREEN} {self.get_profile_info()["followers"]} {Fore.RESET} """) self.which = input(f"\n -->") if self.which == "1": self.phr = "3" self.total_ = self.get_profile_info()["following"] elif self.which == "2": self.phr = "2" self.total_ = self.get_profile_info()["followers"] else: self.phr = "3" self.total_ = self.get_profile_info()["following"] self.brows.find_element(By.XPATH, f"//ul/li[{self.phr}]/a/div").click( ) #click followers or followed button self.data = [] if self.total_ < 24: self.total_ = 5 else: self.total_ = round(self.total_ / 3) self.start_time = time.time() for i in range(self.total_): #page down print(f" Loop : {self.total_}/{Fore.CYAN}{i}", end="\r", flush=True) self.brows.find_element(By.CLASS_NAME, "isgrP").send_keys(Keys.END) self.soup = BS( self.brows.page_source, "lxml" ) #in every loop after page down make new instance for new usernames self.all_span = [ i.text for i in self.soup.find_all("span", attrs={"class": "Jv7Aj"}) ][len(self.data):] for name in self.all_span: if name not in self.data: self.data.append(name) print( f"\t[Users Scraped : {Fore.MAGENTA}{len(self.data)} {Fore.RESET}] Sec: {Fore.YELLOW} {round(time.time() - self.start_time)}" .expandtabs(30), end="\r", flush=True) print( f"\t[Users Scraped : {Fore.MAGENTA}{len(self.data)} {Fore.RESET}] | Sec: {Fore.YELLOW} {round(time.time() - self.start_time)}" .expandtabs(30)) def get_profile_info(self): self.info = {"post": "", "followers": "", "following": ""} self.soup = BS(self.brows.page_source, "lxml") for data, key in zip(self.soup.find_all("li"), self.info.keys()): checked_str = data.text.split()[0] try: self.info[key] = int(checked_str) except ValueError: self.info[key] = text_to_num(checked_str) return self.info def follow(self, username): self.brows.get("https://www.instagram.com/{}/".format(username)) self.brows.find_element(By.XPATH, xpaths["follow_but"]).click() print(its_ok + f"Followed > {Fore.BLUE}{username}") def unfollow(self, username): self.brows.get("https://www.instagram.com/{}/".format(username)) self.brows.find_element(By.XPATH, xpaths["follow_but"]).click() self.notfy_but = WebDriverWait(self.brows, 10).until( EC.element_to_be_clickable(mark=(By.XPATH, xpaths["unfollow_notfy"]))) self.notfy_but.click() print(its_ok + f"Unfollowed > {Fore.RED}{username}") def hack_with_foll_unfoll(self, user_list: list): self.follow_count = default_val("How much loop? [default 8] : ", "8") self.delay_unfollow = default_val( "Delay for unfollows [second] [default 10 sec] :", "10") self.delay_follow = default_val( "Delay for follows [second] [default 4 sec] :", "4") for i in range(self.follow_count): for name in user_list: try: self.follow(name) except: pass self.delay(self.delay_unfollow) for name in user_list: try: self.unfollow(name) except: pass self.delay(self.delay_follow) print(f"\nLoop : {i+1}/{self.follow_count}") def close_tab(self): self.brows.close() def close_all(self): self.brows.quit() @staticmethod def delay(delay): for i in range(delay, 0, -1): time.sleep(1) print(f" Waiting for [{Fore.YELLOW}{i}{Fore.RESET}] ", end="\r")
from selenium.webdriver import Firefox from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.expected_conditions import presence_of_element_located from time import sleep url = 'https://selenium.dunossauro.live/aula_10_a.html' driver = Firefox() driver_wait = WebDriverWait(driver, 30) driver.get(url) driver.maximize_window() locator = (By.CSS_SELECTOR, '#request') driver_wait.until( presence_of_element_located(locator) ) driver.find_element(*locator).click() sleep(10) driver.close() driver.quit()
def scrape_save(self): """Scrape the data of each member""" try: ## navigate to the linkedIN profile linkedIn_driver = Firefox(executable_path='./geckodriver') linkedIn_driver.get(self.URL) # Sign In... try: toggle_button = linkedIn_driver.find_element(By.CLASS_NAME, 'nav__button-secondary') toggle_button.click() time.sleep(2) ## fill the form try: ## find the username input username = linkedIn_driver.find_element(By.ID, 'username') ## find the password input password = linkedIn_driver.find_element(By.ID, 'password') ## find the sign In button sign_in = linkedIn_driver.find_element(By.XPATH, '//button[@type="submit"]') except Exception as e: print('An error occured, probably one of the inputs was not found. Check error log!') print() print(e) ## no exception was raised, meaning all inputs were found else: print("Entering your username...") username.send_keys(self._email) time.sleep(1) print('Entering your password...') password.send_keys(self._password) time.sleep(1) print('Signing you in...') sign_in.click() print('Successfully signed in.') time.sleep(5) self.counter = 0 ## start scraping all profiles for profile in self._profiles: self._scientist_Id = str(uuid.uuid4()) ## navigate to the new profile try: linkedIn_driver.get(profile) print("Scraping {}...".format(profile)) # wait for the page to finish loading time.sleep(5) # for redirects if (profile != linkedIn_driver.current_url): linkedIn_driver.get(linkedIn_driver.current_url) time.sleep(5) except Exception as e: print("Unable to navigate to: {}.".format(profile)) print() print(e) else: ## Scientist name try: name = linkedIn_driver.find_element(By.CSS_SELECTOR, ".pv-top-card--list li:nth-of-type(1)").text except: self.scientist_firstname, self.scientist_lastname = None, None else: scientist = name.split(" ") # if the scientist has one name if len(scientist) == 1: self.scientist_firstname = scientist[0].strip() self.scientist_lastname = None return self.scientist_firstname = scientist[0].strip() self.scientist_lastname = scientist[1].strip() ## Title try: self.scientist_title = linkedIn_driver.find_element(By.CSS_SELECTOR, ".pb5 h2").text except: print("No title") self.scientist_title = None ## Location try: self.location = linkedIn_driver.find_element(By.CSS_SELECTOR, ".pv-top-card--list-bullet li:nth-of-type(1)").text except: print("No location") self.location = None ## Connections try: number = linkedIn_driver.find_element(By.CSS_SELECTOR, '.pv-top-card--list-bullet li:nth-of-type(2) span').text self.connections = self.extract_connections(number) except Exception as e: print("No connections") print(e) self.connections = None ## Experience try: exp_section = linkedIn_driver.find_element(By.CSS_SELECTOR, ".pv-profile-section.experience-section").get_attribute('innerHTML') except: print("No experience") self.all_experiences = None else: experience = BeautifulSoup(exp_section, 'lxml') experience_list = experience.find_all('li', class_="pv-entity__position-group-pager") self.all_experiences = [] for item in experience_list: self.all_experiences.extend(self.parse_experience(item, self._scientist_Id)) ## Education try: edu_section = linkedIn_driver.find_element(By.CSS_SELECTOR, ".education-section").get_attribute('innerHTML') except: print("No education") self.all_education = None else: education = BeautifulSoup(edu_section, 'lxml') education_list = education.find_all('li') self.all_education = [] for edu_item in education_list: self.all_education.extend(self.parse_education(edu_item, self._scientist_Id)) ## Certifications try: cert = linkedIn_driver.find_element(By.CSS_SELECTOR, '.certifications-section').get_attribute("innerHTML") except: print("No certifications") self.all_certifications = None else: certifications = BeautifulSoup(cert, 'lxml') cert_list = certifications.find_all('li') self.all_certifications = [] for cert_item in cert_list: self.all_certifications.extend(self.parse_certificate(cert_item, self._scientist_Id)) print("Done scraping") ## save the profile self.save_success(profile) ## increase the counter self.counter += 1 if (self.all_education and self.all_experiences): ## save attributes self.save() if(self.counter >= 50): print("You've scraped more than {} profiles today. Please quit the scraper".format(self.counter)) ## sleep sleep = randrange(60, 200, 1) print("Sleeping for approximately {} minutes".format(ceil(sleep / 60))) time.sleep(sleep) print() except Exception as e: print("Sign In screen probably did not appear. Check error logs!") print(e) except Exception as e: print("Unable to navigate to linkedIn.com. Perhaps you have a bad internet connection. Check error logs!") print() print(e) finally: linkedIn_driver.quit()
class eksi_engelle: def __init__(self): super().__init__() opts = Options() #todo: headles true ? opts.headless = True self.browser = Firefox(options = opts) def login(self,USERNAME,PASSWORD): #todo login: self.browser.get(URL_LOGIN) self.browser.find_element(By.ID,value='username').send_keys(USERNAME) # login self.browser.find_element(By.ID,value='password').send_keys(PASSWORD) # password self.browser.find_element(By.CSS_SELECTOR, value='button.btn.btn-primary.btn-lg.btn-block').click() try : self.browser.find_element(By.CSS_SELECTOR, value='#login-form-container') except NoSuchElementException : return True else: return False def popupClick(self): # pop up menu self.browser.find_element(By.CSS_SELECTOR, value='#options-dropdown > a:nth-child(1)').click() def logout(self): #todo logout # pop up menu self.popupClick() # terk button self.browser.find_element(By.CSS_SELECTOR, value='li.separated:nth-child(6) > a:nth-child(1)').click() def close(self): #close browser self.browser.close() def quit(self): #quit terminates webdriver session self.browser.quit() def byEntry(self,value): # entry main page self.browser.get(URL_ENTRY + value) self.browser.implicitly_wait(10) suserList = self.collectFavs() #return print(suserList) self.runEngelleToCollectedUsers(suserList) def bySuser(self,suser): #suser main page self.browser.get(URL_SUSER + suser) self.browser.implicitly_wait(10) sleep(5) self.engelle() sleep(5) self.bEngelle() def collectFavs(self): ##suser fav page # ##selenium.common.exceptions.ElementClickInterceptedException: # Message: Element <a class="favorite-count toggles"> # is not clickable at point (491,684) # because another element <div class="toast-bottom-content"> obscures it # Fix WebDriverWait added WebDriverWait(self.browser, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'a.toggles:nth-child(2)'))).click() #self.browser.find_element_by_css_selector('a.toggles:nth-child(2)').click() #Caylak susers collecyion WebDriverWait(self.browser, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#show-caylak-favs-link'))).click() #faved suser list collection = self.browser.find_element(By.CSS_SELECTOR, value='div.toggles-menu:nth-child(3)') susers = collection.text[0:(collection.text.find('\n.'))].replace(' ', '-').replace('@','').split() return susers def engelle(self): #engelle butonu click sleep(5) try: self.browser.find_element(By.XPATH, value='//*[@id="blocked-link"]').click() self.browser.implicitly_wait(10) except NoSuchElementException: return 'NoSuchElementException' else : return 'Blocked' def bEngelle(self): #baslıklarını engelle click sleep(5) try: self.browser.find_element(By.ID, value='blocked-index-title-link').click() self.browser.implicitly_wait(10) except NoSuchElementException: return 'NoSuchElementException' else: return 'B-Blocked' def runEngelleToCollectedUsers(self,suserCollection): """Gets Suser list, opens each susers page in new tab and performs blocking actions. Parameters ---------- suserCollection : [] list of the susers. """ for suser in suserCollection: # open susers page in new tab new_url = URL_SUSER + suser new_tab = "window.open('" + new_url + "\', \'new tab\')" self.browser.execute_script(new_tab) # change tab self.browser.switch_to.window(self.browser.window_handles[1]) # engelle self.engelle() self.browser.implicitly_wait(10) # baskıları engelle self.bEngelle() self.browser.implicitly_wait(10) # close susers tab self.close() # return to main tab self.browser.switch_to.window(self.browser.window_handles[0]) def blockedSuserList(self): # Blocked susers # pop up menu self.popupClick() # takip/engellenmis button self.browser.find_element(By.CSS_SELECTOR, value='.open > li:nth-child(5) > a:nth-child(1)').click() #Return Blocked Count #WebDriverWait(self.browser, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#show-caylak-favs-link'))).click() self.browser.implicitly_wait(10) return self.browser.find_element(By.CSS_SELECTOR, value='div.relation-block:nth-child(4) > p:nth-child(2)').text
from selenium.webdriver.common.keys import Keys import time profile = FirefoxProfile() # Allow autoplay profile.set_preference("media.autoplay.default", 0) cap = DesiredCapabilities.FIREFOX options = Options() # options.headless = True driver = Firefox(firefox_profile=profile, capabilities=cap, options=options) driver.get('https://banggia.vps.com.vn/#PhaiSinh/VN30') try: time.sleep(3) with driver.context(driver.CONTEXT_CHROME): console = driver.find_element(By.ID, "tabbrowser-tabs") console.send_keys(Keys.LEFT_CONTROL + Keys.LEFT_SHIFT + 'k') time.sleep(3) console.send_keys( '$x("/html/body/div[2]/div/div/div[2]/div/div/table/tbody/tr[1]/td[1]/a")[0].click()' + Keys.ENTER) time.sleep(3) console.send_keys(':screenshot --full-page' + Keys.ENTER) console.send_keys(Keys.LEFT_CONTROL + Keys.LEFT_SHIFT + 'k') except: pass driver.get_screenshot_as_file("/home/sotola/Desktop/screenshot.png") #%%
from selenium.webdriver import Firefox from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support.expected_conditions import ( element_to_be_clickable) url = 'https://selenium.dunossauro.live/aula_10_b.html' browser = Firefox() browser.get(url) wdw = WebDriverWait(browser, 60) locator = (By.TAG_NAME, 'button') button = browser.find_element(*locator) wdw.until(element_to_be_clickable(locator), 'Elemento é clicavel, espera de 60 seg') button.click() texto = browser.find_element_by_tag_name('p').text assert 'que' in texto browser.quit()
""" Expected conditions São classes prontas para esperas "comuns" (usuais). Algumas categorias (não-oficiais) de espera: - Existência do elemento: saber se o elemento está na tela, ou existe na tela - Visibilidade do elemento: saber se o elemento está desenhado na tela ou não, e também se ele está ativo ou não - Navegação - Verificação de texto """ from selenium.webdriver import Firefox from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support.expected_conditions import presence_of_element_located url = 'https://selenium.dunossauro.live/aula_10_a.html' b = Firefox() b.get(url) wdw = WebDriverWait(b, 30) locator = (By.CSS_SELECTOR, '#request') # Existência do elemento wdw.until(presence_of_element_located(locator)) print ('Apareceu o request!') b.find_element(*locator).click()
DMART='https://dmart.in/' opts = Options() browser = Firefox(options=opts) browser.get(DMART) action = ActionChains(browser) print("Dmart Home Page Opened......") # /html/body/div[19]/div[1] init_addr = "<Insert Your Pincode with Local Area>" pin_code_form = browser.find_element_by_id('pinNumberModal') pin_code_form.send_keys(init_addr) sleep(2) pin_code_form.send_keys(Keys.RETURN) proceed_btn = browser.find_element_by_id('btnPinSubmit') proceed_btn.click() browser.find_element(By.XPATH,"/html/body/div[1]/header[1]/div[3]/div/div[3]/section/div/div[3]/div/div[2]/ul/li/a[1]").click() mob_no = browser.find_element(By.XPATH,"/html/body/div[1]/header[1]/div[3]/div/div[3]/section/div/div[3]/div/div[2]/ul/li/div[1]/div/div/form/div[3]/div/input") mob_no.send_keys("<Enter Mobile Number>") pwd = browser.find_element(By.XPATH,"/html/body/div[1]/header[1]/div[3]/div/div[3]/section/div/div[3]/div/div[2]/ul/li/div[1]/div/div/form/div[4]/div[1]/div/input") pwd.send_keys("<Enter Password>") browser.find_element(By.XPATH,"/html/body/div[1]/header[1]/div[3]/div/div[3]/section/div/div[3]/div/div[2]/ul/li/div[1]/div/div/form/div[7]/button[1]").click() print("Now Logged in....") sleep(2) isAvail=False count=0 while(isAvail==False): count=count+1 browser.get("https://www.google.com/") browser.get(DMART) sleep(1) browser.get("https://dmart.in/delivery?currentAction=Delivery")
from selenium.webdriver import Firefox from selenium.webdriver.common.by import By driver = Firefox() driver.get('http://www.google.com') element = driver.find_element(By.NAME, 'q') driver.quit()
from selenium.webdriver import Firefox from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.expected_conditions import ( text_to_be_present_in_element ) url = 'https://selenium.dunossauro.live/aula_10_d.html' browser = Firefox() browser.get(url) wdw = WebDriverWait(browser, 50) locator_h4 = (By.TAG_NAME, 'h4') locator_nome = (By.CSS_SELECTOR, 'input[name="nome"]') wdw.until( text_to_be_present_in_element(locator_h4, 'Digite') ) browser.find_element(*locator_nome).send_keys('Fausto')
class SeleniumTestCase(StaticLiveServerTestCase): port = 8001 server_thread_class = LiveServerThreadWithReuse """Selenium test cases are only run in CI or if configured explicitly""" def setUp(self): if _CI: self.driver = self.sauce_chrome_webdriver() else: options = FirefoxOptions() options.add_argument('-headless') self.driver = Firefox(options=options) self.driver.implicitly_wait(10) def sauce_chrome_webdriver(self): class_name = self.__class__.__name__ method_name = self._testMethodName options = SafariOptions() options.browser_version = '14' options.platform_name = 'macOS 11.00' sauce_options = { 'name': f'{class_name}.{method_name}', 'build': _SAUCE_BUILD, 'tunnelIdentifier': _SAUCE_TUNNEL, 'username': _SAUCE_USER, 'accessKey': _SAUCE_KEY, } options.set_capability('sauce:options', sauce_options) remote_url = "https://ondemand.us-west-1.saucelabs.com:443/wd/hub" return Remote(command_executor=remote_url, options=options) def tearDown(self): self.driver.quit() if _CI: sauce_client = SauceClient(_SAUCE_USER, _SAUCE_KEY) status = (sys.exc_info() == (None, None, None)) sauce_client.jobs.update_job(job_id=self.driver.session_id, build=_SAUCE_TUNNEL, passed=status) def navigate(self, view_name: str, *args, **kwargs): path = reverse(view_name, args=args, kwargs=kwargs) self.driver.get(self.live_server_url + path) def assert_view(self, view_name: str): path: str = self.driver.current_url.replace(self.live_server_url, '') resolved: ResolverMatch = resolve(path) self.assertEqual(resolved.view_name, view_name) @contextmanager def load(self, timeout=1): page = self.driver.find_element(By.TAG_NAME, 'html') yield WebDriverWait(self.driver, timeout).until(staleness_of(page)) @contextmanager def wait(self, timeout=settings.SELENIUM_TIMEOUT): condition = _UrlHasChanged(self.driver.current_url) yield WebDriverWait(self.driver, timeout).until(condition)
from selenium.webdriver import Firefox from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support.expected_conditions import ( text_to_be_present_in_element, text_to_be_present_in_element_value) url = 'https://selenium.dunossauro.live/aula_10_d.html' browser = Firefox() browser.get(url) wdw = WebDriverWait(browser, 60) locator_h4 = (By.TAG_NAME, 'h4') locator_nome = (By.CSS_SELECTOR, 'input[name="nome"]') wdw.until(text_to_be_present_in_element(locator_h4, 'Digite')) browser.find_element(*locator_nome).send_keys('Teste')