Exemplo n.º 1
0
 def _selenium_lang(lang):
     options = webdriver.ChromeOptions()
     options.add_argument("--headless")
     options.add_argument(f'--lang={lang}')
     browser = webdriver.Chrome(executable_path=CM().install(),
                                options=options)
     return browser
Exemplo n.º 2
0
 def _selenium():
     options = webdriver.ChromeOptions()
     options.add_argument("--headless")
     options.add_argument("window-size=1400,1000")
     browser = webdriver.Chrome(executable_path=CM().install(),
                                options=options)
     return browser
def initialize_browser():

    # Do this so we don't get DevTools and Default Adapter failure
    options = webdriver.ChromeOptions()
    options.add_experimental_option('excludeSwitches', ['enable-logging'])
    options.add_argument("--log-level=3")

    # Initialize chrome driver and set chrome as our browser
    browser = webdriver.Chrome(executable_path=CM().install(), options=options)

    return browser
Exemplo n.º 4
0
def youtube_login(email, password):

    op = webdriver.ChromeOptions()
    # op.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
    # op.add_argument('--headless')
    op.add_argument('--disable-dev-shm-usage')
    # op.add_argument('--no-sandbox')
    op.add_argument('--disable-gpu')
    # op.add_argument("--window-size=1920,1080")
    op.add_argument("--disable-infobars")
    op.add_argument("--log-level=3")
    op.add_argument("--disable-extensions")
    # op.add_argument('--proxy-server=%s' % PROXY)
    # op.add_argument("--proxy-bypass-list=*")
    driver = webdriver.Chrome(options=op, executable_path=CM().install())
    driver.execute_script("document.body.style.zoom='80%'")
    driver.get(
        'https://accounts.google.com/signin/v2/identifier?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Den%26next%3Dhttps%253A%252F%252Fwww.youtube.com%252F&hl=en&ec=65620&flowName=GlifWebSignIn&flowEntry=ServiceLogin'
    )

    print(
        "============================================================================================================="
    )
    print("Google Login")

    # finding email field and putting our email on it
    email_field = driver.find_element_by_xpath('//*[@id="identifierId"]')
    email_field.send_keys(email)
    driver.find_element_by_id("identifierNext").click()
    stop(5)
    print("email - done")

    # finding pass field and putting our pass on it
    find_pass_field = (By.XPATH, '//*[@id="password"]/div[1]/div/div[1]/input')
    WebDriverWait(driver,
                  50).until(EC.presence_of_element_located(find_pass_field))
    pass_field = driver.find_element(*find_pass_field)
    WebDriverWait(driver,
                  50).until(EC.element_to_be_clickable(find_pass_field))
    pass_field.send_keys(password)
    driver.find_element_by_id("passwordNext").click()
    stop(5)
    print("password - done")
    WebDriverWait(driver, 200).until(
        EC.presence_of_element_located(
            (By.CSS_SELECTOR, "ytd-masthead button#avatar-btn")))
    print("Successfully login")
    print(
        "============================================================================================================"
    )

    return driver
Exemplo n.º 5
0
def LoadNaukri(headless):
    """Open Chrome to load Naukri.com"""
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument("--start-maximized")  # ("--kiosk") for MAC
    options.add_argument("--disable-popups")
    options.add_argument("--disable-gpu")
    if headless:
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("headless")

    # updated to use ChromeDriverManager to match correct chromedriver automatically
    driver = None
    try:
        driver = webdriver.Chrome(executable_path=CM().install(),
                                  options=options)
    except:
        driver = webdriver.Chrome(options=options)
    log_msg("Google Chrome Launched!")

    driver.implicitly_wait(3)
    driver.get(NaukriURL)
    return driver
Exemplo n.º 6
0
def scrape(username):
    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")

    mobile_emulation = {
        "userAgent": 'Mozilla/5.0 (Linux; Android 4.0.3; HTC One X Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/83.0.1025.133 Mobile Safari/535.19'
    }
    options.add_experimental_option("mobileEmulation", mobile_emulation)

    bot = webdriver.Chrome(executable_path=CM().install(), options=options)

    bot.get('https://instagram.com/')
    bot.set_window_size(500, 950)
    time.sleep(5)
    bot.find_element_by_xpath(
        '/html/body/div[1]/section/main/article/div/div/div/div[2]/button').click()
    print("Logging in...")
    time.sleep(1)
    username_field = bot.find_element_by_xpath(
        '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[3]/div/label/input')
    username_field.send_keys(USERNAME)

    find_pass_field = (
        By.XPATH, '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[4]/div/label/input')
    WebDriverWait(bot, 50).until(
        EC.presence_of_element_located(find_pass_field))
    pass_field = bot.find_element(*find_pass_field)
    WebDriverWait(bot, 50).until(
        EC.element_to_be_clickable(find_pass_field))
    pass_field.send_keys(PASSWORD)
    bot.find_element_by_xpath(
        '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[6]/button').click()
    time.sleep(5)

    link = 'https://www.instagram.com/{}/'.format(usr)
    bot.get(link)
    time.sleep(5)

    bot.find_element_by_xpath(
        '//*[@id="react-root"]/section/main/div/ul/li[2]/a').click()

    time.sleep(3)
    print('Scrapping...')
    for i in range(round(TIME)):
        ActionChains(bot).send_keys(Keys.END).perform()
        time.sleep(3)

        followers = bot.find_elements_by_xpath(
            '//*[@id="react-root"]/section/main/div/ul/div/li/div/div[1]/div[2]/div[1]/a')

        urls = []

        # getting url from href attribute in title
        for i in followers:
            if i.get_attribute('href') != None:
                urls.append(i.get_attribute('href'))
            else:
                continue

    print('Converting...')
    users = []
    for url in urls:
        user = url.replace('https://www.instagram.com/', '').replace('/', '')
        users.append(user)

    print('Saving...')
    f = open('followers.txt', 'w')
    s1 = '\n'.join(users)
    f.write(s1)
    f.close()
Exemplo n.º 7
0
def scrape(username):
    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")

    # mobile wersion xD
    mobile_emulation = {
        "userAgent": 'Mozilla/5.0 (Linux; Android 4.0.3; HTC One X Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/83.0.1025.133 Mobile Safari/535.19'
    }
    options.add_experimental_option("mobileEmulation", mobile_emulation)

    # auto install manager
    browser = webdriver.Chrome(executable_path=CM().install(), options=options)

    browser.get('https://instagram.com/')
    browser.set_window_size(500, 950)
    time.sleep(1)

    # accept cookies
    browser.find_element_by_xpath('/html/body/div[2]/div/div/div/div[2]/button[1]').click()
    time.sleep(1)

    # Log in
    browser.find_element_by_xpath(
        '/html/body/div[1]/section/main/article/div/div/div/div[2]/button').click()
    print("Logging in...")
    time.sleep(1)

    # username input
    username_field = browser.find_element_by_xpath(
        '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[3]/div/label/input')
    username_field.send_keys(USR_LOGIN)

    # password input
    find_pass_field = (
        By.XPATH, '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[4]/div/label/input')
    WebDriverWait(browser, 50).until(
        EC.presence_of_element_located(find_pass_field))
    pass_field = browser.find_element(*find_pass_field)
    WebDriverWait(browser, 50).until(
        EC.element_to_be_clickable(find_pass_field))
    pass_field.send_keys(USR_PASSWORD)

    # Logging button
    browser.find_element_by_xpath(
        '/html/body/div[1]/section/main/article/div/div/div/form/div[1]/div[6]/button').click()
    time.sleep(2)

    link = 'https://www.instagram.com/{}/'.format(usr)
    browser.get(link)
    time.sleep(2)

    # Followers reference
    browser.find_element_by_xpath(
        '//*[@id="react-root"]/section/main/div/ul/li[2]/a').click()

    time.sleep(3)
    print('Scrapping...')

    # scrolling in followers using end button
    for i in range(round(TIME)):
        ActionChains(browser).send_keys(Keys.END).perform()
        time.sleep(2)

        followers_xpath = '//*[@id="react-root"]/section/main/div/ul/div/li/div/div[1]/div[2]/div[1]/a'

        followers = browser.find_elements_by_xpath(followers_xpath)

        urls = []

        # getting url from href attribute in title
        followers_num = 0
        for n in followers:
            if n.get_attribute('href') is not None:
                urls.append(n.get_attribute('href'))
                followers_num += 1
                if followers_num == int(user_input):
                    break
            else:
                continue

    print('Converting...')
    users = []
    for url in urls:
        # user = url.replace('https://www.instagram.com/', '').replace('/', '')
        users.append(url)

    print('Saving...')
    f = open('followers.csv', 'w')
    s1 = '\n'.join(users)
    f.write(s1)
    f.close()
Exemplo n.º 8
0
    def __init__(self, username, password, headless=True, instapy_workspace=None, profileDir=None):
        self.selectors = {
            "accept_cookies": "//button[text()='Accept']",
            "home_to_login_button": "//button[text()='Log In']",
            "username_field": "username",
            "password_field": "password",
            "button_login": "******",
            "login_check": "//*[@aria-label='Home'] | //button[text()='Save Info'] | //button[text()='Not Now']",
            "search_user": "******",
            "select_user": "******",
            "name": "((//div[@aria-labelledby]/div/span//img[@data-testid='user-avatar'])[1]//..//..//..//div[2]/div[2]/div)[1]",
            "next_button": "//button/*[text()='Next']",
            "textarea": "//textarea[@placeholder]",
            "send": "//button[text()='Send']"
        }

        # Selenium config
        options = webdriver.ChromeOptions()

        if profileDir:
            options.add_argument("user-data-dir=profiles/" + profileDir)

        if headless:
            options.add_argument("--headless")

        mobile_emulation = {
            "userAgent": 'Mozilla/5.0 (Linux; Android 4.0.3; HTC One X Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
        }
        options.add_experimental_option("mobileEmulation", mobile_emulation)

        self.driver = webdriver.Chrome(executable_path=CM().install(), options=options)
        self.driver.set_window_position(0, 0)
        self.driver.set_window_size(414, 736)

        # Instapy init DB
        self.instapy_workspace = instapy_workspace
        self.conn = None
        self.cursor = None
        if self.instapy_workspace is not None:
            self.conn = sqlite3.connect(self.instapy_workspace + "InstaPy/db/instapy.db")
            self.cursor = self.conn.cursor()

            cursor = self.conn.execute("""
                SELECT count(*)
                FROM sqlite_master
                WHERE type='table'
                AND name='message';
            """)
            count = cursor.fetchone()[0]

            if count == 0:
                self.conn.execute("""
                    CREATE TABLE "message" (
                        "username"    TEXT NOT NULL UNIQUE,
                        "message"    TEXT DEFAULT NULL,
                        "sent_message_at"    TIMESTAMP
                    );
                """)

        try:
            self.login(username, password)
        except Exception as e:
            logging.error(e)
            print(str(e))
Exemplo n.º 9
0
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager as CM

print('=====================================================================================================')
print('Heyy, you have to login manully on tiktok, so the bot will wait you 1 minute for loging in manually!')
print('=====================================================================================================')
time.sleep(8)
print('Running bot now, get ready and login manually...')
time.sleep(4)

options = webdriver.ChromeOptions()
bot = webdriver.Chrome(options=options,  executable_path=CM().install())
bot.set_window_size(1680, 900)

bot.get('https://www.tiktok.com/login')
ActionChains(bot).key_down(Keys.CONTROL).send_keys(
    '-').key_up(Keys.CONTROL).perform()
ActionChains(bot).key_down(Keys.CONTROL).send_keys(
    '-').key_up(Keys.CONTROL).perform()
print('Waiting 50s for manual login...')
time.sleep(50)
bot.get('https://www.tiktok.com/upload/?lang=en')
time.sleep(3)


def check_exists_by_xpath(driver, xpath):
    try:
Exemplo n.º 10
0
def scrape():
    usr = input('Whose followers do you want to scrape: ')

    user_input = int(
        input(
            'How many followers do you want to scrape (60-500 recommended): '))

    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")
    options.add_argument('--no-sandbox')
    options.add_argument("--log-level=3")

    bot = webdriver.Chrome(executable_path=CM().install(), options=options)

    bot.get('https://www.instagram.com/accounts/login/')

    time.sleep(2)

    print("Logging in...")

    user_element = WebDriverWait(bot, TIMEOUT).until(
        EC.presence_of_element_located(
            (By.XPATH, '//*[@id="loginForm"]/div/div[1]/div/label/input')))

    user_element.send_keys(USERNAME)

    pass_element = WebDriverWait(bot, TIMEOUT).until(
        EC.presence_of_element_located(
            (By.XPATH, '//*[@id="loginForm"]/div/div[2]/div/label/input')))

    pass_element.send_keys(PASSWORD)

    login_button = WebDriverWait(bot, TIMEOUT).until(
        EC.presence_of_element_located(
            (By.XPATH, '//*[@id="loginForm"]/div/div[3]')))

    time.sleep(0.4)

    login_button.click()

    time.sleep(5)

    bot.get('https://www.instagram.com/{}/'.format(usr))

    time.sleep(3.5)

    WebDriverWait(bot, TIMEOUT).until(
        EC.presence_of_element_located(
            (By.XPATH,
             '//*[@id="react-root"]/section/main/div/header/section/ul/li[2]/a'
             ))).click()

    time.sleep(2)

    followers_elem = WebDriverWait(bot, TIMEOUT).until(
        EC.presence_of_element_located(
            (By.XPATH, '/html/body/div[5]/div/div/div[2]/ul/div/li[1]')))

    print('Scraping...')

    users = set()

    for _ in range(round(user_input // 10)):
        followers_elem.click()

        ActionChains(bot).send_keys(Keys.END).perform()

        time.sleep(2)

        followers = bot.find_elements_by_xpath(
            '/html/body/div[5]/div/div/div[2]/ul/div/li/div/div[1]/div/div/a')

        # Getting url from href attribute
        for i in followers:
            if i.get_attribute('href'):
                users.add(i.get_attribute('href').split("/")[3])
            else:
                continue

    mode = "a"

    if os.path.exists("followers.txt"):
        choice = input("You already have a file named 'followers.txt'\n"
                       "Do you want to delete it's content? (y/N): ").lower()
        mode = "w" if choice == "y" else mode

    print('Saving...')

    with open('followers.txt', mode) as file:
        file.write('\n'.join(users) + "\n")