def __play_at_fastest_speed(browser: webdriver.Firefox):
     js = 'return document.getElementById("movie_player").getAvailablePlaybackRates()'
     playback_rates: list = list(browser.execute_script(js))
     fast_js = 'document.getElementById("movie_player").setPlaybackRate({})' \
         .format(playback_rates[-1])
     browser.execute_script(fast_js)
     log.info("\tPlay at fastest speed {}.".format(playback_rates[-1]))
def get_praise():
    browser = Firefox(
        executable_path=r'C:\Program Files\Mozilla Firefox\geckodriver.exe')

    browser.get("https://www.google.com/search?q=Lenovo IdeaPad 330"
                )  #.format(object_p)

    results = browser.find_elements_by_css_selector('div.g')
    href = []
    for result in results:
        link = result.find_element_by_tag_name("a")
        href.append(link.get_attribute("href"))

    iterator = 0
    print(len(href))
    list_price = []
    for link in href:
        try:
            #time.sleep(5)
            browser.execute_script("window.open('');")
            browser.switch_to.window(
                browser.window_handles[iterator])  # Switch to the new window
            print(link)
            browser.get(link)
            print(browser.find_elements_by_css_selector('div'))
            iterator += 1
        except:
            print("Error: unable to start thread")


#object_p = input()
    print(list_price)
def recent_post_links(username, post_count=10):
    """
    With the input of an account page, scrape the 10 most recent posts urls

    Args:
    username: Instagram username
    post_count: default of 10, set as many or as few as you want

    Returns:
    A list with the unique url links for the most recent posts for the provided user
    """
    url = "https://www.instagram.com/" + username + "/"
    firefox_options = Options()
    firefox_options.add_argument("--headless")
    browser = Firefox(firefox_options=firefox_options)
    browser.get(url)
    post = 'https://www.instagram.com/p/'
    post_links = []
    while len(post_links) < post_count:
        links = [
            a.get_attribute('href')
            for a in browser.find_elements_by_tag_name('a')
        ]
        for link in links:
            if post in link and link not in post_links:
                post_links.append(link)
        scroll_down = "window.scrollTo(0, document.body.scrollHeight);"
        browser.execute_script(scroll_down)
        time.sleep(10)
    else:
        browser.stop_client()
        return post_links[:post_count]
示例#4
0
def html2png(tid):
    # print('into html2png')

    url = 'file:///' + path.join(path.dirname(__file__), str(tid), 'cvt.html')
    # "file:///D:/source/ngapost2md/22143850/cvt.html"
    save_fn = path.join(path.dirname(__file__), str(tid), 'cvt.png')

    option = Options()
    option.add_argument('--headless')
    option.add_argument('--disable-gpu')
    option.add_argument("--window-size=800,600")
    option.add_argument("--hide-scrollbars")

    driver = Firefox(executable_path='geckodriver', firefox_options=option)
    # print(url)
    driver.get(url)
    # print(driver.title)

    scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
    scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
    driver.set_window_size(scroll_width, scroll_height)
    # try:
    driver.save_screenshot(save_fn)
    # except:
    #     print("ERROR")
    driver.quit()
    time.sleep(5)
    os.system("taskkill /F /IM firefox.exe")
示例#5
0
 def load_localstorage(self, session_id):
     sessions = json.load(open(self.sessions_file))
     storage_path = sessions[str(session_id)]["session_path"]
     url = sessions[str(session_id)]["web_url"]
     # Setting useragent to the same one the session saved with
     useragent = sessions[str(session_id)]["useragent"]
     profile = FirefoxProfile()
     profile.set_preference("general.useragent.override", useragent)
     localStorage = pickle.load(open(storage_path, "rb"))
     try:
         browser = Firefox(profile)
     except:
         error("Couldn't open browser to view session!")
         return
     browser.get(url)
     browser.delete_all_cookies()
     browser.execute_script(
         "window.localStorage.clear()")  # clear the current localStorage
     for key, value in localStorage.items():
         browser.execute_script(
             "window.localStorage.setItem(arguments[0], arguments[1]);",
             key, value)
     status(f"Session {session_id} loaded")
     browser.refresh()
     self.browsers.append(browser)
示例#6
0
def getPage(url):
    driver = Firefox(executable_path='geckodriver', firefox_options=options)
    limit=2
    driver.set_page_load_timeout(limit)  
    driver.set_script_timeout(limit)
    try:
        driver.get(url)
    except:
        pass
    locator = (By.CSS_SELECTOR, 'div.capsule__action--buttons')
    while True:
        try:
            WebDriverWait(driver, 400, 0.5).until(EC.presence_of_element_located(locator))
            return getPaper(driver.page_source.encode('utf-8'),url)
            driver.execute_script('window.stop()')
            driver.close()
        except BaseException,e:
            driver.execute_script('window.stop()')
            driver.close()
            sleep(10)
            driver = Firefox(executable_path='geckodriver', firefox_options=options)
            limit=2
            driver.set_page_load_timeout(limit)  
            driver.set_script_timeout(limit)
            print 'base',type(e),url
示例#7
0
def get_inner_html(driver: webdriver.Firefox):
    """ Get html from inner iframe """
    while not driver.execute_script(INNER_FRAME_READY) == 0:
        driver.implicitly_wait(0.5)
    return driver.execute_script(
        "return document.querySelector('iframe#tab-content').contentWindow.document.body.innerHTML"
    )
示例#8
0
class Posts:
    """Scapes Instagram Posts information and returns them as dynamically created attributes."""

    # handles scraping of information as well as setting them as attributes
    scrape = Scraper()

    # handles creation of DataFrame
    df = Create_DataFrame()

    # checks .env for credentials
    user = CheckEnv()
    password = CheckEnv()

    def __init__(self, term, n=9, user=None, password=None):
        self.user = user
        self.password = password
        self.browser = Firefox()
        self.scrape = {
            "post_urls": self.get_post_urls(term, n),
            "browser": self.browser,
        }
        self.df = self.scrape

    def get_post_urls(self, term, n):
        """
        Retrieves urls of Instagram posts based on hashtag or username
        :param term: hashtag or username
        :param n: number of posts to be scraped
        :return: list of post urls
        """

        # gives appropriate url based on term
        url = user_or_tag(term)

        # logs in if credentials are provided
        if self.user and self.password:
            login(self.browser, self.user, self.password)
        self.browser.get(url)
        post_links = []

        # appends to post url until number of posts specified
        while len(post_links) < n:

            # gathers all links on webpage
            lis = self.browser.find_elements_by_tag_name("a")
            for web_element in lis:
                href = web_element.get_attribute("href")

                # checks if links are Instagram posts and prevent repeat posts
                if "https://www.instagram.com/p/" in href and href not in post_links:
                    post_links.append(href)

            # scrolls down to retrieve more posts
            scroll_down = "window.scrollTo(0, document.body.scrollHeight);"
            self.browser.execute_script(scroll_down)

            # sleeps to prevent being banned
            time.sleep(3)
        posts = post_links[:n]
        return posts
示例#9
0
def browser(request, config_browser, config_wait_time):
    # Initialize WebDriver
    if config_browser == 'chrome':
        driver = webdriver.Chrome(ChromeDriverManager().install())
    elif config_browser == 'chrome-local':
        options = webdriver.ChromeOptions()
        options.add_argument(
            "--unsafely-treat-insecure-origin-as-secure=http://swarm_nginx,ws://eth-node-nginx:8546,http://eth-node-nginx:8545,http://client-backend-nginx,http://service-cdp:3010,http://client-api:10130,http://core-rpc:3700,http://notary_nginx"
        )

        driver = webdriver.Remote(
            command_executor=SELENIUM_HUB_URL,
            desired_capabilities=options.to_capabilities())
    elif config_browser == 'firefox':
        driver = Firefox()
    else:
        raise Exception(f'"{config_browser}" is not a supported browser')

    # Wait implicitly for elements to be ready before attempting interactions
    driver.implicitly_wait(config_wait_time)
    # open browser fullscreen
    driver.maximize_window()

    # @todo implement video_recording again with docker
    # video_recorder_process = subprocess.Popen(['python3', 'helpers/video_recording.py']).pid

    # Return the driver object at the end of setup
    yield driver
    if request.node.rep_call.failed:
        # Make the screen-shot if test failed:
        try:
            driver.execute_script("document.body.bgColor = 'white';")
            # Attaching video to Allure
            # @todo implement video_recording again with docker
            # screenshot_capturing.terminate(video_recorder_process)
            # _attach_video_to_allure()

            # Attaching screenshot to Allure
            allure.attach(driver.get_screenshot_as_png(),
                          name=request.function.__name__,
                          attachment_type=allure.attachment_type.PNG)
        except:
            pass  # just ignore
    else:
        # @todo implement video_recording again with docker
        # screenshot_capturing.terminate(video_recorder_process)
        if logger.soft_assert_fail:
            logger.soft_assert_fail = False
            allure.attach("Traceback: \n" + logger.logs)
            # @todo implement video_recording again with docker
            # _attach_video_to_allure()
            raise Exception("There is a soft assertion failure")
        logger.logs = ""

    # For cleanup, quit the driver
    def fin():
        driver.close()

    request.addfinalizer(fin)
示例#10
0
def get_signature():
    firefox = Firefox()
    firefox.get('https://www.toutiao.com/ch/news_fashion/')
    ascp = firefox.execute_script('return ascp.getHoney()')
    sinature = firefox.execute_script('return TAC.sign(' + '' +
                                      str(time.time()) + ')')
    print(ascp)
    print(sinature)
def callWebsite():

    try:

        opts = Options()

        opts.set_headless()

        assert opts.headless

        browser = Firefox(options=opts)
        browser.implicitly_wait(3)

        browser.get(
            'https://ebilet.tcddtasimacilik.gov.tr/view/eybis/tnmGenel/tcddWebContent.jsf'
        )

        nereden = browser.find_element_by_id('nereden')
        nereden.clear()
        nereden.send_keys('İstanbul(Halkalı)')

        print(
            browser.find_element_by_xpath(
                "//input[contains(@id,'nereden')]").get_attribute('value'))

        nereye = browser.find_element_by_id('nereye')
        nereye.clear()
        nereye.send_keys('Ankara Gar')

        print(
            browser.find_element_by_xpath(
                "//input[contains(@id,'nereye')]").get_attribute('value'))

        tarih = browser.find_element_by_id('trCalGid_input')
        tarih.clear()
        tarih.send_keys(takipEdilecekTarih)
        print(
            browser.find_element_by_xpath(
                "//input[contains(@id,'trCalGid_input')]").get_attribute(
                    'value'))

        ara = browser.find_element_by_id('btnSeferSorgula')

        browser.execute_script("arguments[0].click()", ara)

        time.sleep(5)

        results = browser.find_element_by_id(
            'mainTabView:gidisSeferTablosu:1:j_idt104:0:somVagonTipiGidis1_label'
        )

        a = results.text.split(" ")[3]
        global number
        number = a[a.find("(") + 1:a.find(")")]
    except:
        callWebsite()

    return int(number)
示例#12
0
class Downloader:
    """Deprecated 动态页面下载器"""
    def __init__(self, driver="Chrome", load_time=10):
        start_time = time.time()
        if driver == "Chrome":
            self.driver = Chrome()
        else:
            options = Options()
            options.add_argument('-headless')
            self.driver = Firefox(firefox_options=options)
        logging.info("Webdriver init spent " + str(time.time() - start_time) +
                     "s.")
        self.driver.set_page_load_timeout(load_time)  # get页面时最多等待页面加载10S

    def __enter__(self):
        """在使用with语句时使用,返回值与as后的参数绑定"""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """在with语句完成时,对象销毁前调用"""
        self.driver.quit()

    def download(self, url, after_scroll_time=3):
        """下载一个web页面"""

        start_time = time.time()
        try:
            self.driver.get(url)  # 请求页面
            # todo 存储图片 screenshot_base64 = self.driver.get_screenshot_as_base64()
        except TimeoutException as e:
            logging.info("Get url:" + url + ", msg: " + e.msg)
            self.driver.execute_script("window.stop()")
        finally:
            load_time = time.time() - start_time
            logging.info("Get url:" + url + " spend " + str(load_time) + "s.")
        js_scroll = """
                    function go_down() {
                        var h = document.documentElement.scrollHeight || document.body.scrollHeight;
                        window.scroll(h, h);
                    }
                    go_down()
                """  # 翻页JS
        self.driver.execute_script(js_scroll)  # 执行翻页
        time.sleep(after_scroll_time)  # 执行了翻页后等待页面加载nS

        current_url = self.driver.current_url
        page_source = self.driver.page_source

        download_item = MainItem()  # 初始化结果对象
        # 填充现有信息
        download_item.request_url = url
        download_item.final_url = current_url
        # download_item.screen_shot = screenshot_base64
        download_item.load_time = load_time
        download_item.html = page_source
        download_item.get_time = int(time.time())  # 时间戳

        return download_item
示例#13
0
def lms_login(driver: webdriver.Firefox, url=LMS_LOGIN):
    driver.get(url)
    login_input = driver.find_element_by_id("username")
    for c in LOGIN:
        login_input.send_keys(c)
        time.sleep(0.1)
    pass_input = driver.find_element_by_id("password")
    for c in PASSWORD:
        pass_input.send_keys(c)
        time.sleep(0.1)
    login_btn = driver.find_element_by_id("loginbtn")
    login_btn.click()
    time.sleep(1)
    # card = driver.find_element_by_link_text("CSR-1. 12.01.2021_Python Programming For Penetration Testing")
    # card = driver.find_element_by_link_text("CSR-23_ 12.05.2021_ Python Programming For Penetration Testing")
    # card = driver.find_element_by_xpath(xpath="//a/span[contains(text(),'CSR-23_ 12.05.2021_ Python Programming For Penetration Testing')]")
    # card = driver.find_element_by_xpath(xpath="//a/span[@class='foo']")
    # card = card.find_element_by_xpath("..")

    WebDriverWait(driver, 20).until(
        expected_conditions.element_to_be_clickable((
            By.XPATH,
            "//a/span[contains(text(),'CSR-23_ 12.05.2021_ Python Programming For Penetration Testing')]"
        ))).click()

    # time.sleep(1)
    # card.click()

    # print(card.text)
    # a = card.parent
    # print("a", a.tag_name)
    # print("a", a.text)

    time.sleep(1)
    module = driver.find_element_by_id("module-11224")
    # tasks = module.find_element_by_tag_name("a")
    tasks = module.find_element_by_xpath(".//a[1]")
    # tasks.click()
    url = tasks.get_attribute("href")
    print("task url:", url)
    driver.get(url)

    time.sleep(1)
    table = driver.find_element_by_class_name("generaltable")

    # row = driver.find_element_by_partial_link_text(datetime.datetime.today().strftime("%d.%m.%y"))
    row = table.find_element_by_xpath(".//*[contains(text(),'{}')]".format(
        datetime.datetime.today().strftime('%-d.%m.%y')))
    parent_tr = row.find_element_by_xpath('..').find_element_by_xpath("..")
    # x,y = row.location_once_scrolled_into_view
    print(parent_tr.tag_name)
    a = parent_tr.find_element_by_xpath(".//*[@title='Пароль']")
    # a = row.find_element_by_class_name("helptooltip")
    url = a.get_attribute("href")
    driver.get(url)
    # a.click()
    time.sleep(2)
    driver.execute_script("alert('end')")
示例#14
0
def hide_wayback_machine_bar(driver: webdriver.Firefox,
                             element_id: str = 'wm-ipp-base'):
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, element_id)))
        driver.execute_script('document.body.removeChild'
                              f"(document.getElementById('{element_id}'))")
        return True
    except TimeoutException:
        return False
def run_browser_and_crawl_urls(urls: list, need_scrolling=False, already_downloaded: list = None) -> list:
    """
    Start browser & crawl urls. Parse Sources url and return it.

    :param urls: Urls for crawling
    :param need_scrolling: Scrollings is needed?
    :param already_downloaded: Downloaded url for breakepoints in scrolling
    :return:
    """
    # Function for get sources links from HTML
    def get_sources(brwr) -> list:
        """
        :param brwr: Selenium Driver instance
        :return:
        """
        _sources = []

        for selector, attr in CSS_SELECTORS:
            for e in browser.find_elements_by_css_selector(selector):
                _sources.append(e.get_attribute(attr))
                pass
            pass

        return _sources

    browser = Firefox()
    sources = []

    for url in urls:
        browser.get(url)
        page = browser.page_source
        # If scrollings is needed, then scroll to end.
        while True and need_scrolling:
            browser.execute_script(f'window.scrollTo(0, 9999999999999)')
            sleep(SCROLL_WAIT_TIME)  # Wait for Async loading

            if already_downloaded:  # Breakpoints url
                if len(set(already_downloaded) & set(get_sources(browser))) > 0:
                    break
                pass

            if page == browser.page_source:  # If same content then break this loop
                break

            page = browser.page_source
            pass

        sources += get_sources(browser)
        pass

    browser.close()
    return sources
示例#16
0
def test_google_search():
    driver = Firefox(executable_path='/home/udtech3574/user/geckodriver')
    driver.get('http://circa.udtech.global/en/')

    driver.find_element_by_xpath(
        './/div/div/a[@class="big Button-sc-1g20tyc-0 dxdNr"]').click()
    driver.find_element_by_xpath(
        './/div/div/a[@class="ClosestLocationBlock__SchedulerButton-c1i0bd-6 gwLpBX '
        'small Button-sc-1g20tyc-0 dxdNr"]').click()
    # def check_result_count(driver):

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    a = driver.find_elements_by_xpath("//label[contains(text(), '0')]")
    if len(a) == 0:
        print('A=0')
    a[1].click()
示例#17
0
 def __call__(self, driver: webdriver.Firefox) -> bool:
     try:
         # intentionally conflating null and undefined with !=
         is_ready = driver.execute_script(f"return {self.obj} != null")
     except JavascriptException:
         return False
     else:
         return is_ready
示例#18
0
def checkout(web_driver: webdriver.Firefox, user_selections):
    web_driver.get(cart_url)

    #Starts the checkout process
    web_driver.find_element_by_id(checkout_button_id).click()

    #Verifies and ships using default shipping info
    web_driver.execute_script("CheckShippingAddress()")
    time.sleep(2)
    web_driver.execute_script("ChoiceAddress()")
    time.sleep(2)
    web_driver.find_element_by_id(agree_tos_checkbox_id).click()
    web_driver.find_element_by_id(checkout_continue_button_id).click()
    time.sleep(2)

    #I only support credit card purchases at this time.
    web_driver.find_element_by_id(credit_card_radio_id).click()
    web_driver.find_element_by_id(checkout_continue_button_id).click()

    #Populate credit card info
    web_driver.find_element_by_id(cardholder_name_input_id).send_keys(
        user_selections["cardholder_name"])
    web_driver.find_element_by_id(credit_card_input_id).send_keys(
        user_selections["card_number"])
    web_driver.find_element_by_id(cvv_input_id).send_keys(
        user_selections["security_code"])
    Select(web_driver.find_element_by_id(expiration_month_id)).select_by_value(
        user_selections["expiration_month"])
    Select(web_driver.find_element_by_id(expiration_year_id)).select_by_value(
        user_selections["expiration_year"])

    #Submit and wait for card info to get validated
    web_driver.find_element_by_id(modal_checkout_continue_button_id).click()
    time.sleep(10)

    #Press final agree box
    web_driver.find_element_by_id(agree_tos_checkbox_part_two_id).click()
    time.sleep(2)

    #Complete the order
    web_driver.find_element_by_id(checkout_continue_button_id).click()
    print(
        "we believe we have placed your order. Please verify, and if this worked enjoy your new card :)."
    )
示例#19
0
def read_url(url):
    driver = Firefox(options=options)
    driver.maximize_window()
    driver.get(url)
    time.sleep(4)
    height = driver.execute_script("return document.body.scrollHeight")
    print(height)

    position = 0

    while position < height:
        driver.execute_script(f"window.scrollTo(0, {position});")
        delta = random.randint(50, 500)
        position += delta
        duration = delta // 20
        # print(height, position, delta, duration)
        time.sleep(duration)

    driver.close()
示例#20
0
def get_localstorage(driver: webdriver.Firefox) -> Dict[str, str]:
    """ Load all data from localstorage """
    return driver.execute_script(r"""var ls = window.localStorage;
    var items = {};
    var key;
    for (var i = 0; i < ls.length; i += 1) {
        key = ls.key(i)
        items[key] = ls.getItem(key);
    }
    return items;""")
示例#21
0
def screenshot_title(browser: webdriver.Firefox, link: str, directoryname: str, filename: str, id: str):
    """The browser takes a screenshot of the title

    Args:
        link (str): The link to the post's title
        directoryname (str): The directory to save the screenshot in
        filename (str): The name of the file to save as
        id (str): The id of the title to move to on the page

    Returns:
        None
    """
    browser.get(link)
    nsfw_check(browser)

    if id:
        element = browser.find_element_by_id(id)
        browser.execute_script("arguments[0].scrollIntoView(alignToTop=false);", element)

    browser.save_screenshot("{}/pictures/{}.png".format(directoryname, filename))
 def __get_player_status(browser: webdriver.Firefox) -> str:
     status_check_list = {
         -1: 'unstarted',
         0: 'ended',
         1: 'playing',
         2: 'paused',
         3: 'buffering',
         5: 'video cued'
     }
     js = 'return document.getElementById("movie_player").getPlayerState()'
     return status_check_list[browser.execute_script(js)]
示例#23
0
def result():
    if request.method == 'POST':
        print(request.form["phoneNumber"])
        phoneNumber=request.form["phoneNumber"]
        driver = Firefox()
        driver.get("https://voip.ms/login")
        email = driver.find_elements_by_xpath('//input[@id="username"]')[1]
        email.send_keys('*****@*****.**')
        password = driver.find_elements_by_xpath('//input[@id="password"]')[1]
        password.send_keys('Developer44$')

        login_btn = driver.find_elements_by_xpath('//input[@class="btn btn-ghost btn-big"]')[1]
        login_btn.click()


        time.sleep(5);

        #wait = WebDriverWait(driver, 10);
        #wait.until(EC.visbtibility_of_element_located((By.XPATH, '//a[contains(@href,"/m/managesubaccount.php")]'))).click()
        btn = driver.find_element_by_link_text("Sub Accounts")
        hover = ActionChains(driver).move_to_element(btn)
        hover.perform()

        subbtn = driver.find_element_by_link_text("Manage Sub Accounts")
        subbtn.click();
        time.sleep(5)
        #element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//a[contains(@href,"editsub.php?action=edit&id=266419")')))
        #element.click()
        el = driver.find_elements_by_class_name("edit-icon")[1];
        el.click();
        time.sleep(5)
        el1 = driver.find_element_by_name("callerid");
        el1.clear()
        el1.send_keys(phoneNumber);

        update = driver.find_element_by_id("button");
        driver.execute_script("arguments[0].click();", update)
        time.sleep(5);
        driver.close();
        return "Success"
示例#24
0
 def load_cookie(self, session_id):
     sessions = json.load(open( self.sessions_file ))
     cookie_path = sessions[str(session_id)]["session_path"]
     url = sessions[str(session_id)]["web_url"]
     # Setting useragent to the same one the session saved with
     useragent = sessions[str(session_id)]["useragent"]
     profile = FirefoxProfile()
     profile.set_preference("general.useragent.override", useragent )
     cookies = pickle.load(open(cookie_path, "rb"))
     try:
         browser = Firefox(profile)
     except:
         error("Couldn't open browser to view session!")
         return
     browser.get(url)
     browser.delete_all_cookies()
     browser.execute_script("window.localStorage.clear()") # clear the current localStorage
     for cookie in cookies:
         browser.add_cookie(cookie)
     status(f"Session {session_id} loaded")
     browser.refresh()
     self.browsers.append(browser)
示例#25
0
def driver_open_noBS(url):
    from selenium.webdriver import Firefox
    from selenium.webdriver.firefox.options import Options
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    driver = Firefox(executable_path='/usr/local/bin/geckodriver',
                     options=options)
    driver.get(url)
    html1 = driver.page_source
    html2 = driver.execute_script("return document.documentElement.innerHTML;")
    driver.close()
    return html2
def last_5_post(username):
    url = "https://www.instagram.com/" + username
    options = Options()
    options.add_argument('-headless')
    browser = Firefox(executable_path='geckodriver', options=options)
    browser.get(url)
    post = 'https://www.instagram.com/p/'
    link_post_temp = []
    link_post = []
    while len(link_post) < 2:
        links = browser.find_elements_by_xpath("//a[@href]")
        for a in links:
            link_post_temp.append(a.get_attribute("href"))
        for link in link_post_temp:
            if post in link and link not in link_post:
                link_post.append(link)
        scroll_down = "window.scrollTo(0, document.body.scrollHeight);"
        browser.execute_script(scroll_down)
        time.sleep(10)
    else:
        browser.stop_client()
        return link_post[:2]
示例#27
0
def recent_post_links(username, post_count=10):
    """
    With the input of an account page, scrape the 10 most recent posts urls

    Args:
    username: Instagram username
    post_count: default of 10, set as many or as few as you want

    Returns:
    A list with the unique url links for the most recent posts for the provided user
    """
    url = "https://www.instagram.com/" + username + "/"
    firefox_options = Options()
    firefox_options.add_argument("--headless")
    browser = Firefox(
        executable_path=r'/Users/Akira/Desktop/WebScrapperrrrr/geckodriver.exe',
        firefox_options=firefox_options)
    browser.get(url)
    post = 'https://www.instagram.com/p/'
    post_links = []
    while len(post_links) < post_count:
        b = browser.find_elements_by_xpath(
            '//*[@id="react-root"]/section/main/div/div[3]/article/div[1]/div')
        links = [
            b.get_attribute('href')
            for b in browser.find_elements_by_tag_name('a')
        ]

        for link in links:
            if post in link and link not in post_links:
                post_links.append(link)
        scroll_down = "window.scrollTo(0, document.body.scrollHeight);"
        browser.execute_script(scroll_down)
    else:
        browser.stop_client()
        return post_links[:post_count]

    browser.close()
示例#28
0
def browser(request, config_browser, config_wait_time):
    # Initialize WebDriver
    if config_browser == 'chrome':
        driver = webdriver.Chrome(ChromeDriverManager().install())
    elif config_browser == 'firefox':
        driver = Firefox()
    else:
        raise Exception(f'"{config_browser}" is not a supported browser')

    # Wait implicitly for elements to be ready before attempting interactions
    driver.implicitly_wait(config_wait_time)
    # open browser fullscreen
    driver.maximize_window()

    # Return the driver object at the end of setup
    yield driver
    if request.node.rep_call.failed:
        # Make the screen-shot if test failed:
        try:
            driver.execute_script("document.body.bgColor = 'white';")
            # Attaching video to Allure
            # @todo implement video_recording again with docker
            # screenshot_capturing.terminate(video_recorder_process)
            # _attach_video_to_allure()

            # Attaching screenshot to Allure
            allure.attach(driver.get_screenshot_as_png(),
                          name=request.function.__name__,
                          attachment_type=allure.attachment_type.PNG)
        except:
            pass  # just ignore

    # For cleanup, quit the driver
    def fin():
        driver.close()

    request.addfinalizer(fin)
示例#29
0
    def scrape(self):

        # CONNECT THE DATABASE
        connector = sqlite3.connect('games.db')
        cursor = connector.cursor()
        cursor.execute('CREATE TABLE IF NOT EXISTS allGames('
                       ' minute TEXT, home_team TEXT, away_team TEXT,'
                       ' home_score DECIMAL, away_score DECIMAL,'
                       ' home_odd REAL, draw_odd REAL, away_odd REAL)')

        options = FirefoxOptions()
        options.headless = True
        driver = Firefox(options=options,
                         executable_path='C:\Windows\geckodriver.exe')
        driver.get(self.WEB_LINKS["football"])
        sleep(2)
        driver.find_element_by_xpath(
            '/html/body/div[1]/div/div[2]/div[1]/div[2]/div/div[1]/div/div[1]/div[1]/i'
        ).click()
        sleep(1)
        html = driver.execute_script(
            'return document.documentElement.outerHTML;')

        soup = BeautifulSoup(html, 'html.parser')
        games = soup.find_all(
            class_=re.compile("sr-match-container sr-border"))
        all_games = [game for game in games]
        driver.close()

        for game in all_games:
            print(str(game))

            # TAKE THE MINUTE
            minute_pattern = r'[n]\"\>([A-z0-9]{1,2})'
            minute_token = re.search(minute_pattern, str(game))
            minute = minute_token.group(1)

            # FIND THE TEAMS
            teams_pattern = r'[e]\=\"(.{1,40})\"\>'
            teams_token = re.findall(teams_pattern, str(game))
            home_team, away_team = teams_token[0], teams_token[1]

            # FIND THE SCORE
            home_score_pattern = r'[e]\"\>(\d{1,2})\<'
            away_score_pattern = r'[y]\"\>(\d{1,2})\<'
            home_score_token = re.search(home_score_pattern, str(game))
            away_score_token = re.search(away_score_pattern, str(game))
            home_score = home_score_token.group(1)
            away_score = away_score_token.group(1)
示例#30
0
def upload_source(driver: webdriver.Firefox, file: Path):
    """Upload file to Karma"""
    driver.find_elements_by_css_selector("ul.nav > li.dropdown")[0].click()
    short_delay()

    file_input = driver.find_element_by_css_selector("form#fileupload input")
    driver.execute_script(
        '''
arguments[0].style = ""; 
arguments[0].style.display = "block"; 
arguments[0].style.visibility = "visible";''', file_input)
    file_input.send_keys(str(file))

    delay()
    # select file format
    driver.find_element_by_css_selector("#btnSaveFormat").click()

    delay()
    # select #objects to import
    driver.find_element_by_css_selector("#btnSaveOptions").click()

    total_wait_seconds = 0
    while total_wait_seconds < 30:
        total_wait_seconds += delay()

        # check if worksheet is loaded
        try:
            if driver.find_element_by_css_selector(
                    "#WorksheetOptionsDiv a").text.strip() == file.name:
                break
        except NoSuchElementException:
            pass
    else:
        raise Exception("Cannot load worksheet of source: %s" % file.name)

    delay()
示例#31
0
    def get_selenium_js_html(self, url):
        options = Options()
        options.add_argument('-headless')  # 无头参数
        driver = Firefox(executable_path='geckodriver',
                         firefox_options=options)  # 配了环境变量第一个参数就可以省了,不然传绝对路径
        wait = WebDriverWait(driver, timeout=10)
        driver.get(url)
        #browser = webdriver.PhantomJS()
        #browser.get(url)
        time.sleep(3)

        # 执行js得到整个页面内容
        html = driver.execute_script(
            "return document.documentElement.outerHTML")
        driver.quit()
        return html
示例#32
0
def login(username, password):
  """Login into website, return cookies, api and sso token using geckodriver/firefox headless"""

  display = Display(visible=0, size=(800, 600))
  display.start()
#  options = Options()
#  options.add_argument('-headless')
#  driver = Firefox(executable_path='/usr/local/bin/geckodriver', firefox_options=options)
  driver = Firefox()
  wait = WebDriverWait(driver, timeout=10)

  driver.get(url)
  time.sleep(10)

  username_field = driver.find_element_by_name("emailOrPcrNumber")
#  There are multiple entries with the name pin, use the xpath instead even though it is more error prone
#  password_field = driver.find_element_by_name("pin")
  password_field = driver.find_element_by_xpath('/html/body/div[1]/div/div/div[2]/div[1]/div[2]/form/div/div[1]/div[2]/input')

  username_field.clear()
  username_field.send_keys(username)

  password_field.clear()
  password_field.send_keys(password)

  time.sleep(2)
  driver.find_element_by_id("tpiSubmitButton").click()

  time.sleep(3)
  cookies = driver.get_cookies()
  for cookie in cookies:
    if cookie['name'] == 'X-IHG-SSO-TOKEN':
      sso_token = cookie['value']
  api_key = driver.execute_script('return AppConfig.featureToggle.apiKey')

  driver.get('https://apis.ihg.com')
  cookies.extend(driver.get_cookies())
  driver.quit()
  display.stop()
  return api_key, sso_token, cookies
class Hertz():
	def __init__( self ):
		self.driver = Firefox()
		#self.driver.get( 'https://www.hertzequip.com/herc/rental-equipment/aerial-equipment+manlifts+articulating-boom-lift' )
		self.driver.implicitly_wait( 15 )
		
	def perform_search( self, zip_code ):
		self.driver.get( 'https://www.hertzequip.com/herc/rental-equipment/aerial-equipment+manlifts+articulating-boom-lift' )
		#  wait for javascript to load the page before we begin
		self.driver.find_element_by_xpath( '//span[text()="View Rates"]' )
		#  selenium commands can be very unreliable
		#  since Jquery is already loaded, I'll use that
		self.driver.execute_script( """
			$('span:contains("View Rates")')[0].click()
			$('input[id="postalCode"]').val('""" + zip_code + """')
			$('a[class="button_four button_four_skin"]')[0].click()
			""" )
		#  wait a few seconds for page element to disappear
		sleep( 3 )
		#  wait until javascript has loaded the page again
		self.driver.find_element_by_xpath( '//span[text()="View Rates"]' )
			
	def get_rates( self ):
		rates_info_json = self.driver.execute_script( """	
			var xmlhttp = new XMLHttpRequest()
			var url = "https://www.hertzequip.com/herc/rest/rentals/getBuilderEquipmentDetailsItemInfo?catalogType=" + String( catalogType ) + "&categoryId=" + categoryId + "&subcategory1Id=" + subcategory1Id + "&subcategory2Id=" + subcategory2Id + "&_=" + String( new Date().getTime() )
			xmlhttp.open( "GET", url, false )
			xmlhttp.send()
			return xmlhttp.responseText
			""" )
		return rates_info_json
			
	def get_store_info( self ):
		store_info_json = self.driver.execute_script( """
			var xmlhttp = new XMLHttpRequest()
			var url = "https://www.hertzequip.com/herc/rest/rentals/getBuilderJobLocationsItemInfo?_=" + String( new Date().getTime() )
			xmlhttp.open( "GET", url, false )
			xmlhttp.send()
			return xmlhttp.responseText
			""" )
		return store_info_json
		
	def perform_search_get_data( self, zip_code ):
		self.perform_search( zip_code )
		location_data = json.loads( self.get_store_info() )
		equipment_rates_data = json.loads( self.get_rates() )
		specific_data = OrderedDict()
		specific_data['branch_zip_code'] = location_data['data']['jobLocation']['branchLocation']['zip']
		specific_data['branch_id'] = location_data['data']['jobLocation']['branchLocation']['branch']
		specific_data['equipment_list'] = []
		for equipment in equipment_rates_data['data']['equipmentInfo']['m_list']:
			equipment_data = OrderedDict()
			equipment_data['specs'] = [ equipment['spec1'], equipment['spec2'] ]
			equipment_data['makes & models'] = equipment['makesModels']['m_list']
			equipment_data['rates'] = equipment['ratesInfo']
			specific_data['equipment_list'].append( equipment_data )
		return { 
			'location_data' : location_data,
			'equipment_rates_data' : equipment_rates_data,
			'specific_data' : specific_data
			}
		
		
		
示例#34
0
 def test_we_can_switch_context_to_chrome(self, capabilities):
     driver = Firefox(capabilities=capabilities)
     driver.set_context('chrome')
     assert 1 == driver.execute_script("var c = Components.classes; return 1;")