def _selenium_lang(lang): options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument(f'--lang={lang}') browser = webdriver.Chrome(executable_path=CM().install(), options=options) return browser
def _selenium(): options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("window-size=1400,1000") browser = webdriver.Chrome(executable_path=CM().install(), options=options) return browser
def initialize_browser(): # Do this so we don't get DevTools and Default Adapter failure options = webdriver.ChromeOptions() options.add_experimental_option('excludeSwitches', ['enable-logging']) options.add_argument("--log-level=3") # Initialize chrome driver and set chrome as our browser browser = webdriver.Chrome(executable_path=CM().install(), options=options) return browser
def youtube_login(email, password): op = webdriver.ChromeOptions() # op.binary_location = os.environ.get("GOOGLE_CHROME_BIN") # op.add_argument('--headless') op.add_argument('--disable-dev-shm-usage') # op.add_argument('--no-sandbox') op.add_argument('--disable-gpu') # op.add_argument("--window-size=1920,1080") op.add_argument("--disable-infobars") op.add_argument("--log-level=3") op.add_argument("--disable-extensions") # op.add_argument('--proxy-server=%s' % PROXY) # op.add_argument("--proxy-bypass-list=*") driver = webdriver.Chrome(options=op, executable_path=CM().install()) driver.execute_script("document.body.style.zoom='80%'") driver.get( 'https://accounts.google.com/signin/v2/identifier?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Den%26next%3Dhttps%253A%252F%252Fwww.youtube.com%252F&hl=en&ec=65620&flowName=GlifWebSignIn&flowEntry=ServiceLogin' ) print( "=============================================================================================================" ) print("Google Login") # finding email field and putting our email on it email_field = driver.find_element_by_xpath('//*[@id="identifierId"]') email_field.send_keys(email) driver.find_element_by_id("identifierNext").click() stop(5) print("email - done") # finding pass field and putting our pass on it find_pass_field = (By.XPATH, '//*[@id="password"]/div[1]/div/div[1]/input') WebDriverWait(driver, 50).until(EC.presence_of_element_located(find_pass_field)) pass_field = driver.find_element(*find_pass_field) WebDriverWait(driver, 50).until(EC.element_to_be_clickable(find_pass_field)) pass_field.send_keys(password) driver.find_element_by_id("passwordNext").click() stop(5) print("password - done") WebDriverWait(driver, 200).until( EC.presence_of_element_located( (By.CSS_SELECTOR, "ytd-masthead button#avatar-btn"))) print("Successfully login") print( "============================================================================================================" ) return driver
def LoadNaukri(headless): """Open Chrome to load Naukri.com""" options = webdriver.ChromeOptions() options.add_argument("--disable-notifications") options.add_argument("--start-maximized") # ("--kiosk") for MAC options.add_argument("--disable-popups") options.add_argument("--disable-gpu") if headless: options.add_argument("--disable-dev-shm-usage") options.add_argument("headless") # updated to use ChromeDriverManager to match correct chromedriver automatically driver = None try: driver = webdriver.Chrome(executable_path=CM().install(), options=options) except: driver = webdriver.Chrome(options=options) log_msg("Google Chrome Launched!") driver.implicitly_wait(3) driver.get(NaukriURL) return driver
def scrape(username): options = webdriver.ChromeOptions() # options.add_argument("--headless") mobile_emulation = { "userAgent": 'Mozilla/5.0 (Linux; Android 4.0.3; HTC One X Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/83.0.1025.133 Mobile Safari/535.19' } options.add_experimental_option("mobileEmulation", mobile_emulation) bot = webdriver.Chrome(executable_path=CM().install(), options=options) bot.get('https://instagram.com/') bot.set_window_size(500, 950) time.sleep(5) bot.find_element_by_xpath( '/html/body/div[1]/section/main/article/div/div/div/div[2]/button').click() print("Logging in...") time.sleep(1) username_field = bot.find_element_by_xpath( '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[3]/div/label/input') username_field.send_keys(USERNAME) find_pass_field = ( By.XPATH, '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[4]/div/label/input') WebDriverWait(bot, 50).until( EC.presence_of_element_located(find_pass_field)) pass_field = bot.find_element(*find_pass_field) WebDriverWait(bot, 50).until( EC.element_to_be_clickable(find_pass_field)) pass_field.send_keys(PASSWORD) bot.find_element_by_xpath( '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[6]/button').click() time.sleep(5) link = 'https://www.instagram.com/{}/'.format(usr) bot.get(link) time.sleep(5) bot.find_element_by_xpath( '//*[@id="react-root"]/section/main/div/ul/li[2]/a').click() time.sleep(3) print('Scrapping...') for i in range(round(TIME)): ActionChains(bot).send_keys(Keys.END).perform() time.sleep(3) followers = bot.find_elements_by_xpath( '//*[@id="react-root"]/section/main/div/ul/div/li/div/div[1]/div[2]/div[1]/a') urls = [] # getting url from href attribute in title for i in followers: if i.get_attribute('href') != None: urls.append(i.get_attribute('href')) else: continue print('Converting...') users = [] for url in urls: user = url.replace('https://www.instagram.com/', '').replace('/', '') users.append(user) print('Saving...') f = open('followers.txt', 'w') s1 = '\n'.join(users) f.write(s1) f.close()
def scrape(username): options = webdriver.ChromeOptions() # options.add_argument("--headless") # mobile wersion xD mobile_emulation = { "userAgent": 'Mozilla/5.0 (Linux; Android 4.0.3; HTC One X Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/83.0.1025.133 Mobile Safari/535.19' } options.add_experimental_option("mobileEmulation", mobile_emulation) # auto install manager browser = webdriver.Chrome(executable_path=CM().install(), options=options) browser.get('https://instagram.com/') browser.set_window_size(500, 950) time.sleep(1) # accept cookies browser.find_element_by_xpath('/html/body/div[2]/div/div/div/div[2]/button[1]').click() time.sleep(1) # Log in browser.find_element_by_xpath( '/html/body/div[1]/section/main/article/div/div/div/div[2]/button').click() print("Logging in...") time.sleep(1) # username input username_field = browser.find_element_by_xpath( '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[3]/div/label/input') username_field.send_keys(USR_LOGIN) # password input find_pass_field = ( By.XPATH, '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[4]/div/label/input') WebDriverWait(browser, 50).until( EC.presence_of_element_located(find_pass_field)) pass_field = browser.find_element(*find_pass_field) WebDriverWait(browser, 50).until( EC.element_to_be_clickable(find_pass_field)) pass_field.send_keys(USR_PASSWORD) # Logging button browser.find_element_by_xpath( '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[6]/button').click() time.sleep(2) link = 'https://www.instagram.com/{}/'.format(usr) browser.get(link) time.sleep(2) # Followers reference browser.find_element_by_xpath( '//*[@id="react-root"]/section/main/div/ul/li[2]/a').click() time.sleep(3) print('Scrapping...') # scrolling in followers using end button for i in range(round(TIME)): ActionChains(browser).send_keys(Keys.END).perform() time.sleep(2) followers_xpath = '//*[@id="react-root"]/section/main/div/ul/div/li/div/div[1]/div[2]/div[1]/a' followers = browser.find_elements_by_xpath(followers_xpath) urls = [] # getting url from href attribute in title followers_num = 0 for n in followers: if n.get_attribute('href') is not None: urls.append(n.get_attribute('href')) followers_num += 1 if followers_num == int(user_input): break else: continue print('Converting...') users = [] for url in urls: # user = url.replace('https://www.instagram.com/', '').replace('/', '') users.append(url) print('Saving...') f = open('followers.csv', 'w') s1 = '\n'.join(users) f.write(s1) f.close()
def __init__(self, username, password, headless=True, instapy_workspace=None, profileDir=None): self.selectors = { "accept_cookies": "//button[text()='Accept']", "home_to_login_button": "//button[text()='Log In']", "username_field": "username", "password_field": "password", "button_login": "******", "login_check": "//*[@aria-label='Home'] | //button[text()='Save Info'] | //button[text()='Not Now']", "search_user": "******", "select_user": "******", "name": "((//div[@aria-labelledby]/div/span//img[@data-testid='user-avatar'])[1]//..//..//..//div[2]/div[2]/div)[1]", "next_button": "//button/*[text()='Next']", "textarea": "//textarea[@placeholder]", "send": "//button[text()='Send']" } # Selenium config options = webdriver.ChromeOptions() if profileDir: options.add_argument("user-data-dir=profiles/" + profileDir) if headless: options.add_argument("--headless") mobile_emulation = { "userAgent": 'Mozilla/5.0 (Linux; Android 4.0.3; HTC One X Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' } options.add_experimental_option("mobileEmulation", mobile_emulation) self.driver = webdriver.Chrome(executable_path=CM().install(), options=options) self.driver.set_window_position(0, 0) self.driver.set_window_size(414, 736) # Instapy init DB self.instapy_workspace = instapy_workspace self.conn = None self.cursor = None if self.instapy_workspace is not None: self.conn = sqlite3.connect(self.instapy_workspace + "InstaPy/db/instapy.db") self.cursor = self.conn.cursor() cursor = self.conn.execute(""" SELECT count(*) FROM sqlite_master WHERE type='table' AND name='message'; """) count = cursor.fetchone()[0] if count == 0: self.conn.execute(""" CREATE TABLE "message" ( "username" TEXT NOT NULL UNIQUE, "message" TEXT DEFAULT NULL, "sent_message_at" TIMESTAMP ); """) try: self.login(username, password) except Exception as e: logging.error(e) print(str(e))
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException from webdriver_manager.chrome import ChromeDriverManager as CM print('=====================================================================================================') print('Heyy, you have to login manully on tiktok, so the bot will wait you 1 minute for loging in manually!') print('=====================================================================================================') time.sleep(8) print('Running bot now, get ready and login manually...') time.sleep(4) options = webdriver.ChromeOptions() bot = webdriver.Chrome(options=options, executable_path=CM().install()) bot.set_window_size(1680, 900) bot.get('https://www.tiktok.com/login') ActionChains(bot).key_down(Keys.CONTROL).send_keys( '-').key_up(Keys.CONTROL).perform() ActionChains(bot).key_down(Keys.CONTROL).send_keys( '-').key_up(Keys.CONTROL).perform() print('Waiting 50s for manual login...') time.sleep(50) bot.get('https://www.tiktok.com/upload/?lang=en') time.sleep(3) def check_exists_by_xpath(driver, xpath): try:
def scrape(): usr = input('Whose followers do you want to scrape: ') user_input = int( input( 'How many followers do you want to scrape (60-500 recommended): ')) options = webdriver.ChromeOptions() # options.add_argument("--headless") options.add_argument('--no-sandbox') options.add_argument("--log-level=3") bot = webdriver.Chrome(executable_path=CM().install(), options=options) bot.get('https://www.instagram.com/accounts/login/') time.sleep(2) print("Logging in...") user_element = WebDriverWait(bot, TIMEOUT).until( EC.presence_of_element_located( (By.XPATH, '//*[@id="loginForm"]/div/div[1]/div/label/input'))) user_element.send_keys(USERNAME) pass_element = WebDriverWait(bot, TIMEOUT).until( EC.presence_of_element_located( (By.XPATH, '//*[@id="loginForm"]/div/div[2]/div/label/input'))) pass_element.send_keys(PASSWORD) login_button = WebDriverWait(bot, TIMEOUT).until( EC.presence_of_element_located( (By.XPATH, '//*[@id="loginForm"]/div/div[3]'))) time.sleep(0.4) login_button.click() time.sleep(5) bot.get('https://www.instagram.com/{}/'.format(usr)) time.sleep(3.5) WebDriverWait(bot, TIMEOUT).until( EC.presence_of_element_located( (By.XPATH, '//*[@id="react-root"]/section/main/div/header/section/ul/li[2]/a' ))).click() time.sleep(2) followers_elem = WebDriverWait(bot, TIMEOUT).until( EC.presence_of_element_located( (By.XPATH, '/html/body/div[5]/div/div/div[2]/ul/div/li[1]'))) print('Scraping...') users = set() for _ in range(round(user_input // 10)): followers_elem.click() ActionChains(bot).send_keys(Keys.END).perform() time.sleep(2) followers = bot.find_elements_by_xpath( '/html/body/div[5]/div/div/div[2]/ul/div/li/div/div[1]/div/div/a') # Getting url from href attribute for i in followers: if i.get_attribute('href'): users.add(i.get_attribute('href').split("/")[3]) else: continue mode = "a" if os.path.exists("followers.txt"): choice = input("You already have a file named 'followers.txt'\n" "Do you want to delete it's content? (y/N): ").lower() mode = "w" if choice == "y" else mode print('Saving...') with open('followers.txt', mode) as file: file.write('\n'.join(users) + "\n")